diff options
309 files changed, 6704 insertions, 2754 deletions
@@ -422,6 +422,7 @@ Tony Luck <tony.luck@intel.com> TripleX Chung <xxx.phy@gmail.com> <triplex@zh-kernel.org> TripleX Chung <xxx.phy@gmail.com> <zhongyu@18mail.cn> Tsuneo Yoshioka <Tsuneo.Yoshioka@f-secure.com> +Tudor Ambarus <tudor.ambarus@linaro.org> <tudor.ambarus@microchip.com> Tycho Andersen <tycho@tycho.pizza> <tycho@tycho.ws> Tzung-Bi Shih <tzungbi@kernel.org> <tzungbi@google.com> Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de> diff --git a/Documentation/ABI/testing/debugfs-pktcdvd b/Documentation/ABI/testing/debugfs-pktcdvd new file mode 100644 index 000000000000..f6f65a4faea0 --- /dev/null +++ b/Documentation/ABI/testing/debugfs-pktcdvd @@ -0,0 +1,18 @@ +What: /sys/kernel/debug/pktcdvd/pktcdvd[0-7] +Date: Oct. 2006 +KernelVersion: 2.6.20 +Contact: Thomas Maier <balagi@justmail.de> +Description: + +The pktcdvd module (packet writing driver) creates +these files in debugfs: + +/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/ + + ==== ====== ==================================== + info 0444 Lots of driver statistics and infos. + ==== ====== ==================================== + +Example:: + + cat /sys/kernel/debug/pktcdvd/pktcdvd0/info diff --git a/Documentation/ABI/testing/sysfs-class-pktcdvd b/Documentation/ABI/testing/sysfs-class-pktcdvd new file mode 100644 index 000000000000..ba1ce626591d --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-pktcdvd @@ -0,0 +1,97 @@ +sysfs interface +--------------- +The pktcdvd module (packet writing driver) creates the following files in the +sysfs: (<devid> is in the format major:minor) + +What: /sys/class/pktcdvd/add +What: /sys/class/pktcdvd/remove +What: /sys/class/pktcdvd/device_map +Date: Oct. 2006 +KernelVersion: 2.6.20 +Contact: Thomas Maier <balagi@justmail.de> +Description: + + ========== ============================================== + add (WO) Write a block device id (major:minor) to + create a new pktcdvd device and map it to the + block device. + + remove (WO) Write the pktcdvd device id (major:minor) + to remove the pktcdvd device. + + device_map (RO) Shows the device mapping in format: + pktcdvd[0-7] <pktdevid> <blkdevid> + ========== ============================================== + + +What: /sys/class/pktcdvd/pktcdvd[0-7]/dev +What: /sys/class/pktcdvd/pktcdvd[0-7]/uevent +Date: Oct. 2006 +KernelVersion: 2.6.20 +Contact: Thomas Maier <balagi@justmail.de> +Description: + dev: (RO) Device id + + uevent: (WO) To send a uevent + + +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_started +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_finished +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_written +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read_gather +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/reset +Date: Oct. 2006 +KernelVersion: 2.6.20 +Contact: Thomas Maier <balagi@justmail.de> +Description: + packets_started: (RO) Number of started packets. + + packets_finished: (RO) Number of finished packets. + + kb_written: (RO) kBytes written. + + kb_read: (RO) kBytes read. + + kb_read_gather: (RO) kBytes read to fill write packets. + + reset: (WO) Write any value to it to reset + pktcdvd device statistic values, like + bytes read/written. + + +What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/size +What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_off +What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_on +Date: Oct. 2006 +KernelVersion: 2.6.20 +Contact: Thomas Maier <balagi@justmail.de> +Description: + ============== ================================================ + size (RO) Contains the size of the bio write queue. + + congestion_off (RW) If bio write queue size is below this mark, + accept new bio requests from the block layer. + + congestion_on (RW) If bio write queue size is higher as this + mark, do no longer accept bio write requests + from the block layer and wait till the pktcdvd + device has processed enough bio's so that bio + write queue size is below congestion off mark. + A value of <= 0 disables congestion control. + ============== ================================================ + + +Example: +-------- +To use the pktcdvd sysfs interface directly, you can do:: + + # create a new pktcdvd device mapped to /dev/hdc + echo "22:0" >/sys/class/pktcdvd/add + cat /sys/class/pktcdvd/device_map + # assuming device pktcdvd0 was created, look at stat's + cat /sys/class/pktcdvd/pktcdvd0/stat/kb_written + # print the device id of the mapped block device + fgrep pktcdvd0 /sys/class/pktcdvd/device_map + # remove device, using pktcdvd0 device id 253:0 + echo "253:0" >/sys/class/pktcdvd/remove diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml index 0ccaab16dc61..0b7383b3106b 100644 --- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml +++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Advanced Encryption Standard (AES) HW cryptographic accelerator maintainers: - - Tudor Ambarus <tudor.ambarus@microchip.com> + - Tudor Ambarus <tudor.ambarus@linaro.org> properties: compatible: diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml index 5163c51b4547..ee2ffb034325 100644 --- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml +++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Secure Hash Algorithm (SHA) HW cryptographic accelerator maintainers: - - Tudor Ambarus <tudor.ambarus@microchip.com> + - Tudor Ambarus <tudor.ambarus@linaro.org> properties: compatible: diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml index fcc5adf03cad..3d6ed24b1b00 100644 --- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml +++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Triple Data Encryption Standard (TDES) HW cryptographic accelerator maintainers: - - Tudor Ambarus <tudor.ambarus@microchip.com> + - Tudor Ambarus <tudor.ambarus@linaro.org> properties: compatible: diff --git a/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml b/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml index bcbfa71536cd..3efdc192ab01 100644 --- a/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml +++ b/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml @@ -80,7 +80,7 @@ properties: or applicable for the respective data port. More info in MIPI Alliance SoundWire 1.0 Specifications. minItems: 3 - maxItems: 5 + maxItems: 8 qcom,ports-sinterval-low: $ref: /schemas/types.yaml#/definitions/uint8-array @@ -124,7 +124,7 @@ properties: or applicable for the respective data port. More info in MIPI Alliance SoundWire 1.0 Specifications. minItems: 3 - maxItems: 5 + maxItems: 8 qcom,ports-block-pack-mode: $ref: /schemas/types.yaml#/definitions/uint8-array @@ -154,7 +154,7 @@ properties: or applicable for the respective data port. More info in MIPI Alliance SoundWire 1.0 Specifications. minItems: 3 - maxItems: 5 + maxItems: 8 items: oneOf: - minimum: 0 @@ -171,7 +171,7 @@ properties: or applicable for the respective data port. More info in MIPI Alliance SoundWire 1.0 Specifications. minItems: 3 - maxItems: 5 + maxItems: 8 items: oneOf: - minimum: 0 @@ -187,7 +187,7 @@ properties: or applicable for the respective data port. More info in MIPI Alliance SoundWire 1.0 Specifications. minItems: 3 - maxItems: 5 + maxItems: 8 items: oneOf: - minimum: 0 diff --git a/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml b/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml index 4dd973e341e6..6c57dd6c3a36 100644 --- a/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml +++ b/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel SPI device maintainers: - - Tudor Ambarus <tudor.ambarus@microchip.com> + - Tudor Ambarus <tudor.ambarus@linaro.org> allOf: - $ref: spi-controller.yaml# diff --git a/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml b/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml index 1d493add4053..b0d99bc10535 100644 --- a/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml +++ b/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Quad Serial Peripheral Interface (QSPI) maintainers: - - Tudor Ambarus <tudor.ambarus@microchip.com> + - Tudor Ambarus <tudor.ambarus@linaro.org> allOf: - $ref: spi-controller.yaml# diff --git a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml index ead2cccf658f..9a60c0664bbe 100644 --- a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml +++ b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml @@ -44,9 +44,9 @@ properties: description: Maximum SPI clocking speed of the device in Hz. - spi-cs-setup-ns: + spi-cs-setup-delay-ns: description: - Delay in nanosecods to be introduced by the controller after CS is + Delay in nanoseconds to be introduced by the controller after CS is asserted. spi-rx-bus-width: diff --git a/Documentation/networking/rxrpc.rst b/Documentation/networking/rxrpc.rst index 39494a6ea739..e1af54424192 100644 --- a/Documentation/networking/rxrpc.rst +++ b/Documentation/networking/rxrpc.rst @@ -880,8 +880,8 @@ The kernel interface functions are as follows: notify_end_rx can be NULL or it can be used to specify a function to be called when the call changes state to end the Tx phase. This function is - called with the call-state spinlock held to prevent any reply or final ACK - from being delivered first. + called with a spinlock held to prevent the last DATA packet from being + transmitted until the function returns. (#) Receive data from a call:: diff --git a/MAINTAINERS b/MAINTAINERS index ba2e1feb83db..c1062b42e680 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13620,7 +13620,7 @@ F: arch/microblaze/ MICROCHIP AT91 DMA DRIVERS M: Ludovic Desroches <ludovic.desroches@microchip.com> -M: Tudor Ambarus <tudor.ambarus@microchip.com> +M: Tudor Ambarus <tudor.ambarus@linaro.org> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: dmaengine@vger.kernel.org S: Supported @@ -13665,7 +13665,7 @@ F: Documentation/devicetree/bindings/media/microchip,csi2dc.yaml F: drivers/media/platform/microchip/microchip-csi2dc.c MICROCHIP ECC DRIVER -M: Tudor Ambarus <tudor.ambarus@microchip.com> +M: Tudor Ambarus <tudor.ambarus@linaro.org> L: linux-crypto@vger.kernel.org S: Maintained F: drivers/crypto/atmel-ecc.* @@ -13762,7 +13762,7 @@ S: Maintained F: drivers/mmc/host/atmel-mci.c MICROCHIP NAND DRIVER -M: Tudor Ambarus <tudor.ambarus@microchip.com> +M: Tudor Ambarus <tudor.ambarus@linaro.org> L: linux-mtd@lists.infradead.org S: Supported F: Documentation/devicetree/bindings/mtd/atmel-nand.txt @@ -13814,7 +13814,7 @@ S: Supported F: drivers/power/reset/at91-sama5d2_shdwc.c MICROCHIP SPI DRIVER -M: Tudor Ambarus <tudor.ambarus@microchip.com> +M: Tudor Ambarus <tudor.ambarus@linaro.org> S: Supported F: drivers/spi/spi-atmel.* @@ -16617,6 +16617,13 @@ S: Supported F: Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml F: drivers/input/keyboard/pinephone-keyboard.c +PKTCDVD DRIVER +M: linux-block@vger.kernel.org +S: Orphan +F: drivers/block/pktcdvd.c +F: include/linux/pktcdvd.h +F: include/uapi/linux/pktcdvd.h + PLANTOWER PMS7003 AIR POLLUTION SENSOR DRIVER M: Tomasz Duszynski <tduszyns@gmail.com> S: Maintained @@ -19679,7 +19686,7 @@ F: drivers/clk/spear/ F: drivers/pinctrl/spear/ SPI NOR SUBSYSTEM -M: Tudor Ambarus <tudor.ambarus@microchip.com> +M: Tudor Ambarus <tudor.ambarus@linaro.org> M: Pratyush Yadav <pratyush@kernel.org> R: Michael Walle <michael@walle.cc> L: linux-mtd@lists.infradead.org @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* @@ -1986,7 +1986,7 @@ $(single-no-ko): $(build-dir) # Remove MODORDER when done because it is not the real one. PHONY += single_modules single_modules: $(single-no-ko) modules_prepare - $(Q){ $(foreach m, $(single-ko), echo $(extmod_prefix)$m;) } > $(MODORDER) + $(Q){ $(foreach m, $(single-ko), echo $(extmod_prefix)$(m:%.ko=%.o);) } > $(MODORDER) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost ifneq ($(KBUILD_MODPOST_NOFINAL),1) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modfinal diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index aecc403b2880..7f092cb55a41 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -128,15 +128,16 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, #define TIF_NEED_RESCHED 1 /* rescheduling necessary */ #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_UPROBE 3 /* breakpointed or singlestepping */ -#define TIF_SYSCALL_TRACE 4 /* syscall trace active */ -#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ -#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ -#define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */ +#define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */ #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ -#define TIF_RESTORE_SIGMASK 20 +#define TIF_RESTORE_SIGMASK 19 +#define TIF_SYSCALL_TRACE 20 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 21 /* syscall auditing active */ +#define TIF_SYSCALL_TRACEPOINT 22 /* syscall tracepoint instrumentation */ +#define TIF_SECCOMP 23 /* seccomp syscall filtering active */ + #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/crypto/sm4-ce-ccm-core.S b/arch/arm64/crypto/sm4-ce-ccm-core.S index 028207c4afd0..fa85856f33ce 100644 --- a/arch/arm64/crypto/sm4-ce-ccm-core.S +++ b/arch/arm64/crypto/sm4-ce-ccm-core.S @@ -8,6 +8,7 @@ */ #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include "sm4-ce-asm.h" @@ -104,7 +105,7 @@ SYM_FUNC_START(sm4_ce_ccm_final) SYM_FUNC_END(sm4_ce_ccm_final) .align 3 -SYM_FUNC_START(sm4_ce_ccm_enc) +SYM_TYPED_FUNC_START(sm4_ce_ccm_enc) /* input: * x0: round key array, CTX * x1: dst @@ -216,7 +217,7 @@ SYM_FUNC_START(sm4_ce_ccm_enc) SYM_FUNC_END(sm4_ce_ccm_enc) .align 3 -SYM_FUNC_START(sm4_ce_ccm_dec) +SYM_TYPED_FUNC_START(sm4_ce_ccm_dec) /* input: * x0: round key array, CTX * x1: dst diff --git a/arch/arm64/crypto/sm4-ce-gcm-core.S b/arch/arm64/crypto/sm4-ce-gcm-core.S index 7aa3ec18a289..347f25d75727 100644 --- a/arch/arm64/crypto/sm4-ce-gcm-core.S +++ b/arch/arm64/crypto/sm4-ce-gcm-core.S @@ -9,6 +9,7 @@ */ #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include "sm4-ce-asm.h" @@ -370,7 +371,7 @@ SYM_FUNC_START(pmull_ghash_update) SYM_FUNC_END(pmull_ghash_update) .align 3 -SYM_FUNC_START(sm4_ce_pmull_gcm_enc) +SYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_enc) /* input: * x0: round key array, CTX * x1: dst @@ -581,7 +582,7 @@ SYM_FUNC_END(sm4_ce_pmull_gcm_enc) #define RH3 v20 .align 3 -SYM_FUNC_START(sm4_ce_pmull_gcm_dec) +SYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_dec) /* input: * x0: round key array, CTX * x1: dst diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 01c132bc33d5..4d06de77d92a 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -64,7 +64,7 @@ void __init plat_mem_setup(void) dtb = get_fdt(); __dt_setup_arch(dtb); - if (!early_init_dt_scan_memory()) + if (early_init_dt_scan_memory()) return; if (soc_info.mem_detect) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8c3862b4c259..958e77a24f85 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -8,6 +8,7 @@ #define BSS_FIRST_SECTIONS *(.bss.prominit) #define EMITS_PT_NOTE #define RO_EXCEPTION_TABLE_ALIGN 0 +#define RUNTIME_DISCARD_EXIT #define SOFT_MASK_TABLE(align) \ . = ALIGN(align); \ @@ -410,9 +411,12 @@ SECTIONS DISCARDS /DISCARD/ : { *(*.EMB.apuinfo) - *(.glink .iplt .plt .rela* .comment) + *(.glink .iplt .plt) *(.gnu.version*) *(.gnu.attributes) *(.eh_frame) +#ifndef CONFIG_RELOCATABLE + *(.rela*) +#endif } } diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 855450bed9f5..ec0cab9fbddd 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -165,7 +165,7 @@ do { \ might_fault(); \ access_ok(__p, sizeof(*__p)) ? \ __get_user((x), __p) : \ - ((x) = 0, -EFAULT); \ + ((x) = (__force __typeof__(x))0, -EFAULT); \ }) #define __put_user_asm(insn, x, ptr, err) \ diff --git a/arch/riscv/kernel/probes/simulate-insn.h b/arch/riscv/kernel/probes/simulate-insn.h index cb6ff7dccb92..de8474146a9b 100644 --- a/arch/riscv/kernel/probes/simulate-insn.h +++ b/arch/riscv/kernel/probes/simulate-insn.h @@ -31,9 +31,9 @@ __RISCV_INSN_FUNCS(fence, 0x7f, 0x0f); } while (0) __RISCV_INSN_FUNCS(c_j, 0xe003, 0xa001); -__RISCV_INSN_FUNCS(c_jr, 0xf007, 0x8002); +__RISCV_INSN_FUNCS(c_jr, 0xf07f, 0x8002); __RISCV_INSN_FUNCS(c_jal, 0xe003, 0x2001); -__RISCV_INSN_FUNCS(c_jalr, 0xf007, 0x9002); +__RISCV_INSN_FUNCS(c_jalr, 0xf07f, 0x9002); __RISCV_INSN_FUNCS(c_beqz, 0xe003, 0xc001); __RISCV_INSN_FUNCS(c_bnez, 0xe003, 0xe001); __RISCV_INSN_FUNCS(c_ebreak, 0xffff, 0x9002); diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c index e27c2140d620..8dcd7af2911a 100644 --- a/arch/s390/boot/decompressor.c +++ b/arch/s390/boot/decompressor.c @@ -23,9 +23,9 @@ #define memmove memmove #define memzero(s, n) memset((s), 0, (n)) -#ifdef CONFIG_KERNEL_BZIP2 +#if defined(CONFIG_KERNEL_BZIP2) #define BOOT_HEAP_SIZE 0x400000 -#elif CONFIG_KERNEL_ZSTD +#elif defined(CONFIG_KERNEL_ZSTD) #define BOOT_HEAP_SIZE 0x30000 #else #define BOOT_HEAP_SIZE 0x10000 diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index a7b4e1d82758..74b35ec2ad28 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -190,7 +190,6 @@ CONFIG_NFT_CT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m -CONFIG_NFT_OBJREF=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m @@ -569,6 +568,7 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 +# CONFIG_LEGACY_TIOCSTI is not set CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_HANGCHECK_TIMER=m @@ -660,6 +660,7 @@ CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_CRAMFS=m CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y @@ -705,6 +706,7 @@ CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y CONFIG_SECURITY_LANDLOCK=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y +CONFIG_INTEGRITY_PLATFORM_KEYRING=y CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y @@ -781,6 +783,7 @@ CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_CORDIC=m CONFIG_CRYPTO_LIB_CURVE25519=m CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m @@ -848,7 +851,6 @@ CONFIG_PREEMPT_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_FTRACE_STARTUP_TEST=y # CONFIG_EVENT_TRACE_STARTUP_TEST is not set @@ -870,7 +872,6 @@ CONFIG_FAIL_MAKE_REQUEST=y CONFIG_FAIL_IO_TIMEOUT=y CONFIG_FAIL_FUTEX=y CONFIG_FAULT_INJECTION_DEBUG_FS=y -CONFIG_FAIL_FUNCTION=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LKDTM=m CONFIG_TEST_MIN_HEAP=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 2bc2d0fe5774..cec71268e3bc 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -181,7 +181,6 @@ CONFIG_NFT_CT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m -CONFIG_NFT_OBJREF=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m @@ -559,6 +558,7 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 +# CONFIG_LEGACY_TIOCSTI is not set CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_HANGCHECK_TIMER=m @@ -645,6 +645,7 @@ CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_CRAMFS=m CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y @@ -688,6 +689,7 @@ CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y CONFIG_SECURITY_LANDLOCK=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y +CONFIG_INTEGRITY_PLATFORM_KEYRING=y CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y @@ -766,6 +768,7 @@ CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m CONFIG_CRYPTO_LIB_CURVE25519=m @@ -798,7 +801,6 @@ CONFIG_STACK_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_SAMPLES=y CONFIG_SAMPLE_TRACE_PRINTK=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index ae14ab0b864d..a9c0c81d1de9 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -13,7 +13,6 @@ CONFIG_TUNE_ZEC12=y # CONFIG_COMPAT is not set CONFIG_NR_CPUS=2 CONFIG_HZ_100=y -# CONFIG_RELOCATABLE is not set # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set CONFIG_CRASH_DUMP=y @@ -50,6 +49,7 @@ CONFIG_ZFCP=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set +# CONFIG_LEGACY_TIOCSTI is not set # CONFIG_HVC_IUCV is not set # CONFIG_HW_RANDOM_S390 is not set # CONFIG_HMC_DRV is not set diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index feaba12dbecb..efa103b52a1a 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -131,19 +131,21 @@ struct hws_combined_entry { struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ } __packed; -struct hws_trailer_entry { - union { - struct { - unsigned int f:1; /* 0 - Block Full Indicator */ - unsigned int a:1; /* 1 - Alert request control */ - unsigned int t:1; /* 2 - Timestamp format */ - unsigned int :29; /* 3 - 31: Reserved */ - unsigned int bsdes:16; /* 32-47: size of basic SDE */ - unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ - }; - unsigned long long flags; /* 0 - 63: All indicators */ +union hws_trailer_header { + struct { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned int t:1; /* 2 - Timestamp format */ + unsigned int :29; /* 3 - 31: Reserved */ + unsigned int bsdes:16; /* 32-47: size of basic SDE */ + unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ + unsigned long long overflow; /* 64 - Overflow Count */ }; - unsigned long long overflow; /* 64 - sample Overflow count */ + __uint128_t val; +}; + +struct hws_trailer_entry { + union hws_trailer_header header; /* 0 - 15 Flags + Overflow Count */ unsigned char timestamp[16]; /* 16 - 31 timestamp */ unsigned long long reserved1; /* 32 -Reserved */ unsigned long long reserved2; /* */ @@ -290,14 +292,11 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi, return USEC_PER_SEC * qsi->cpu_speed / rate; } -#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL -#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL - /* Return TOD timestamp contained in an trailer entry */ static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te) { /* TOD in STCKE format */ - if (te->t) + if (te->header.t) return *((unsigned long long *) &te->timestamp[1]); /* TOD in STCK format */ diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 77f24262c25c..ac665b9670c5 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -4,8 +4,8 @@ * * Copyright IBM Corp. 1999, 2020 */ -#ifndef DEBUG_H -#define DEBUG_H +#ifndef _ASM_S390_DEBUG_H +#define _ASM_S390_DEBUG_H #include <linux/string.h> #include <linux/spinlock.h> @@ -487,4 +487,4 @@ void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas); #endif /* MODULE */ -#endif /* DEBUG_H */ +#endif /* _ASM_S390_DEBUG_H */ diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index cb5fc0690435..081837b391e3 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -31,7 +31,7 @@ pcp_op_T__ *ptr__; \ preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ - prev__ = *ptr__; \ + prev__ = READ_ONCE(*ptr__); \ do { \ old__ = prev__; \ new__ = old__ op (val); \ diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index fc6d5f58debe..2df94d32140c 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -187,8 +187,6 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->memsz = ALIGN(data->memsz, PAGE_SIZE); buf.mem = data->memsz; - if (image->type == KEXEC_TYPE_CRASH) - buf.mem += crashk_res.start; ptr = (void *)ipl_cert_list_addr; end = ptr + ipl_cert_list_size; @@ -225,6 +223,9 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr); *lc_ipl_parmblock_ptr = (__u32)buf.mem; + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; + ret = kexec_add_buffer(&buf); out: return ret; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 332a49965130..ce886a03545a 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -163,14 +163,15 @@ static void free_sampling_buffer(struct sf_buffer *sfb) static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) { - unsigned long sdb, *trailer; + struct hws_trailer_entry *te; + unsigned long sdb; /* Allocate and initialize sample-data-block */ sdb = get_zeroed_page(gfp_flags); if (!sdb) return -ENOMEM; - trailer = trailer_entry_ptr(sdb); - *trailer = SDB_TE_ALERT_REQ_MASK; + te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb); + te->header.a = 1; /* Link SDB into the sample-data-block-table */ *sdbt = sdb; @@ -1206,7 +1207,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, "%s: Found unknown" " sampling data entry: te->f %i" " basic.def %#4x (%p)\n", __func__, - te->f, sample->def, sample); + te->header.f, sample->def, sample); /* Sample slot is not yet written or other record. * * This condition can occur if the buffer was reused @@ -1217,7 +1218,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, * that are not full. Stop processing if the first * invalid format was detected. */ - if (!te->f) + if (!te->header.f) break; } @@ -1227,6 +1228,16 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, } } +static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new) +{ + asm volatile( + " cdsg %[old],%[new],%[ptr]\n" + : [old] "+d" (old), [ptr] "+QS" (*ptr) + : [new] "d" (new) + : "memory", "cc"); + return old; +} + /* hw_perf_event_update() - Process sampling buffer * @event: The perf event * @flush_all: Flag to also flush partially filled sample-data-blocks @@ -1243,10 +1254,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, */ static void hw_perf_event_update(struct perf_event *event, int flush_all) { + unsigned long long event_overflow, sampl_overflow, num_sdb; + union hws_trailer_header old, prev, new; struct hw_perf_event *hwc = &event->hw; struct hws_trailer_entry *te; unsigned long *sdbt; - unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; int done; /* @@ -1266,25 +1278,25 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); /* Leave loop if no more work to do (block full indicator) */ - if (!te->f) { + if (!te->header.f) { done = 1; if (!flush_all) break; } /* Check the sample overflow count */ - if (te->overflow) + if (te->header.overflow) /* Account sample overflows and, if a particular limit * is reached, extend the sampling buffer. * For details, see sfb_account_overflows(). */ - sampl_overflow += te->overflow; + sampl_overflow += te->header.overflow; /* Timestamps are valid for full sample-data-blocks only */ debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx " "overflow %llu timestamp %#llx\n", - __func__, (unsigned long)sdbt, te->overflow, - (te->f) ? trailer_timestamp(te) : 0ULL); + __func__, (unsigned long)sdbt, te->header.overflow, + (te->header.f) ? trailer_timestamp(te) : 0ULL); /* Collect all samples from a single sample-data-block and * flag if an (perf) event overflow happened. If so, the PMU @@ -1294,12 +1306,16 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) num_sdb++; /* Reset trailer (using compare-double-and-swap) */ + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; - te_flags |= SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - te->flags, te->overflow, - te_flags, 0ULL)); + old.val = prev.val; + new.val = prev.val; + new.f = 0; + new.a = 1; + new.overflow = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); /* Advance to next sample-data-block */ sdbt++; @@ -1384,7 +1400,7 @@ static void aux_output_end(struct perf_output_handle *handle) range_scan = AUX_SDB_NUM_ALERT(aux); for (i = 0, idx = aux->head; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - if (!(te->flags & SDB_TE_BUFFER_FULL_MASK)) + if (!te->header.f) break; } /* i is num of SDBs which are full */ @@ -1392,7 +1408,7 @@ static void aux_output_end(struct perf_output_handle *handle) /* Remove alert indicators in the buffer */ te = aux_sdb_trailer(aux, aux->alert_mark); - te->flags &= ~SDB_TE_ALERT_REQ_MASK; + te->header.a = 0; debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n", __func__, i, range_scan, aux->head); @@ -1437,9 +1453,9 @@ static int aux_output_begin(struct perf_output_handle *handle, idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - te->flags &= ~(SDB_TE_BUFFER_FULL_MASK | - SDB_TE_ALERT_REQ_MASK); - te->overflow = 0; + te->header.f = 0; + te->header.a = 0; + te->header.overflow = 0; } /* Save the position of empty SDBs */ aux->empty_mark = aux->head + range - 1; @@ -1448,7 +1464,7 @@ static int aux_output_begin(struct perf_output_handle *handle, /* Set alert indicator */ aux->alert_mark = aux->head + range/2 - 1; te = aux_sdb_trailer(aux, aux->alert_mark); - te->flags = te->flags | SDB_TE_ALERT_REQ_MASK; + te->header.a = 1; /* Reset hardware buffer head */ head = AUX_SDB_INDEX(aux, aux->head); @@ -1475,14 +1491,17 @@ static int aux_output_begin(struct perf_output_handle *handle, static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, unsigned long long *overflow) { - unsigned long long orig_overflow, orig_flags, new_flags; + union hws_trailer_header old, prev, new; struct hws_trailer_entry *te; te = aux_sdb_trailer(aux, alert_index); + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - orig_flags = te->flags; - *overflow = orig_overflow = te->overflow; - if (orig_flags & SDB_TE_BUFFER_FULL_MASK) { + old.val = prev.val; + new.val = prev.val; + *overflow = old.overflow; + if (old.f) { /* * SDB is already set by hardware. * Abort and try to set somewhere @@ -1490,10 +1509,10 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, */ return false; } - new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - orig_flags, orig_overflow, - new_flags, 0ULL)); + new.a = 1; + new.overflow = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); return true; } @@ -1522,8 +1541,9 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, unsigned long long *overflow) { - unsigned long long orig_overflow, orig_flags, new_flags; unsigned long i, range_scan, idx, idx_old; + union hws_trailer_header old, prev, new; + unsigned long long orig_overflow; struct hws_trailer_entry *te; debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld " @@ -1554,17 +1574,20 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, idx_old = idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - orig_flags = te->flags; - orig_overflow = te->overflow; - new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK; + old.val = prev.val; + new.val = prev.val; + orig_overflow = old.overflow; + new.f = 0; + new.overflow = 0; if (idx == aux->alert_mark) - new_flags |= SDB_TE_ALERT_REQ_MASK; + new.a = 1; else - new_flags &= ~SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - orig_flags, orig_overflow, - new_flags, 0ULL)); + new.a = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); *overflow += orig_overflow; } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 5ea3830af0cc..cbf9c1b0beda 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -17,6 +17,8 @@ /* Handle ro_after_init data on our own. */ #define RO_AFTER_INIT_DATA +#define RUNTIME_DISCARD_EXIT + #define EMITS_PT_NOTE #include <asm-generic/vmlinux.lds.h> @@ -79,6 +81,7 @@ SECTIONS _end_amode31_refs = .; } + . = ALIGN(PAGE_SIZE); _edata = .; /* End of data section */ /* will be freed after init */ @@ -193,6 +196,7 @@ SECTIONS BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE) + . = ALIGN(PAGE_SIZE); _end = . ; /* diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 1dae78deddf2..ab26aa53ee37 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -83,8 +83,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) struct esca_block *sca = vcpu->kvm->arch.sca; union esca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + union esca_sigp_ctrl new_val = {0}, old_val; + old_val = READ_ONCE(*sigp_ctrl); new_val.scn = src_id; new_val.c = 1; old_val.c = 0; @@ -95,8 +96,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) struct bsca_block *sca = vcpu->kvm->arch.sca; union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + union bsca_sigp_ctrl new_val = {0}, old_val; + old_val = READ_ONCE(*sigp_ctrl); new_val.scn = src_id; new_val.c = 1; old_val.c = 0; @@ -126,16 +128,18 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu) struct esca_block *sca = vcpu->kvm->arch.sca; union esca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union esca_sigp_ctrl old = *sigp_ctrl; + union esca_sigp_ctrl old; + old = READ_ONCE(*sigp_ctrl); expect = old.value; rc = cmpxchg(&sigp_ctrl->value, old.value, 0); } else { struct bsca_block *sca = vcpu->kvm->arch.sca; union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union bsca_sigp_ctrl old = *sigp_ctrl; + union bsca_sigp_ctrl old; + old = READ_ONCE(*sigp_ctrl); expect = old.value; rc = cmpxchg(&sigp_ctrl->value, old.value, 0); } diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index a889a3a938ba..d1ce73f3bd85 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -28,7 +28,7 @@ #define pmd_ERROR(e) \ printk("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) -typedef struct { +typedef union { struct { unsigned long pmd_low; unsigned long pmd_high; diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index a2834bc93149..3019fb1926e3 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -41,6 +41,7 @@ * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL + * MTL * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 @@ -51,50 +52,50 @@ * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL,RPL,SPR + * TGL,TNT,RKL,ADL,RPL,SPR,MTL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, - * ICL,TGL,RKL,ADL,RPL + * ICL,TGL,RKL,ADL,RPL,MTL * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, - * RPL,SPR + * RPL,SPR,MTL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, * GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL, - * ADL,RPL + * ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL,RPL,SPR + * TGL,TNT,RKL,ADL,RPL,SPR,MTL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL, - * KBL,CML,ICL,TGL,RKL,ADL,RPL + * KBL,CML,ICL,TGL,RKL,ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL,RPL + * ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL,RPL + * ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, - * TNT,RKL,ADL,RPL + * TNT,RKL,ADL,RPL,MTL * Scope: Package (physical package) * */ @@ -686,6 +687,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &adl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 6f1ccc57a692..459b1aafd4d4 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1833,6 +1833,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), {}, }; diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index ecced3a52668..c65d8906cbcf 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -69,6 +69,7 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_BROADWELL_G: case INTEL_FAM6_BROADWELL_X: case INTEL_FAM6_SAPPHIRERAPIDS_X: + case INTEL_FAM6_EMERALDRAPIDS_X: case INTEL_FAM6_ATOM_SILVERMONT: case INTEL_FAM6_ATOM_SILVERMONT_D: @@ -107,6 +108,8 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE_P: case INTEL_FAM6_RAPTORLAKE_S: + case INTEL_FAM6_METEORLAKE: + case INTEL_FAM6_METEORLAKE_L: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index a829492bca4c..52e6e7ed4f78 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -800,13 +800,18 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &model_spr), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &model_skl), {}, }; MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 58db86f7b384..9bdc3b656b2c 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -134,11 +134,6 @@ static inline unsigned p2m_mid_index(unsigned long pfn) return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; } -static inline unsigned p2m_index(unsigned long pfn) -{ - return pfn % P2M_PER_PAGE; -} - static void p2m_top_mfn_init(unsigned long *top) { unsigned i; diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 228e4dff5fb2..a6d09fe04831 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -154,11 +154,6 @@ struct thread_struct { unsigned long ra; /* kernel's a0: return address and window call size */ unsigned long sp; /* kernel's a1: stack pointer */ - /* struct xtensa_cpuinfo info; */ - - unsigned long bad_vaddr; /* last user fault */ - unsigned long bad_uaddr; /* last kernel fault accessing user space */ - unsigned long error_code; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct perf_event *ptrace_bp[XCHAL_NUM_IBREAK]; struct perf_event *ptrace_wp[XCHAL_NUM_DBREAK]; @@ -176,10 +171,6 @@ struct thread_struct { { \ ra: 0, \ sp: sizeof(init_stack) + (long) &init_stack, \ - /*info: {0}, */ \ - bad_vaddr: 0, \ - bad_uaddr: 0, \ - error_code: 0, \ } diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 0c25e035ff10..cd98366a9b23 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -362,8 +362,6 @@ static void do_unaligned_user(struct pt_regs *regs) __die_if_kernel("Unhandled unaligned exception in kernel", regs, SIGKILL); - current->thread.bad_vaddr = regs->excvaddr; - current->thread.error_code = -3; pr_info_ratelimited("Unaligned memory access to %08lx in '%s' " "(pid = %d, pc = %#010lx)\n", regs->excvaddr, current->comm, diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 8c781b05c0bd..faf7cf35a0ee 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -206,8 +206,6 @@ good_area: bad_area: mmap_read_unlock(mm); if (user_mode(regs)) { - current->thread.bad_vaddr = address; - current->thread.error_code = is_write; force_sig_fault(SIGSEGV, code, (void *) address); return; } @@ -232,7 +230,6 @@ do_sigbus: /* Send a sigbus, regardless of whether we were in kernel * or user mode. */ - current->thread.bad_vaddr = address; force_sig_fault(SIGBUS, BUS_ADRERR, (void *) address); /* Kernel mode? Handle exceptions or die */ @@ -252,7 +249,6 @@ bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) if ((entry = search_exception_tables(regs->pc)) != NULL) { pr_debug("%s: Exception at pc=%#010lx (%lx)\n", current->comm, regs->pc, entry->fixup); - current->thread.bad_uaddr = address; regs->pc = entry->fixup; return; } diff --git a/block/Kconfig b/block/Kconfig index 444c5ab3b67e..5d9d9c84d516 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -6,7 +6,6 @@ menuconfig BLOCK bool "Enable the block layer" if EXPERT default y select SBITMAP - select SRCU help Provide block layer support for the kernel. diff --git a/block/bio.c b/block/bio.c index 5f96fcae3f75..ab59a491a883 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1401,6 +1401,27 @@ void __bio_advance(struct bio *bio, unsigned bytes) } EXPORT_SYMBOL(__bio_advance); +void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, + struct bio *src, struct bvec_iter *src_iter) +{ + while (src_iter->bi_size && dst_iter->bi_size) { + struct bio_vec src_bv = bio_iter_iovec(src, *src_iter); + struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter); + unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len); + void *src_buf = bvec_kmap_local(&src_bv); + void *dst_buf = bvec_kmap_local(&dst_bv); + + memcpy(dst_buf, src_buf, bytes); + + kunmap_local(dst_buf); + kunmap_local(src_buf); + + bio_advance_iter_single(src, src_iter, bytes); + bio_advance_iter_single(dst, dst_iter, bytes); + } +} +EXPORT_SYMBOL(bio_copy_data_iter); + /** * bio_copy_data - copy contents of data buffers from one bio to another * @src: source bio @@ -1414,21 +1435,7 @@ void bio_copy_data(struct bio *dst, struct bio *src) struct bvec_iter src_iter = src->bi_iter; struct bvec_iter dst_iter = dst->bi_iter; - while (src_iter.bi_size && dst_iter.bi_size) { - struct bio_vec src_bv = bio_iter_iovec(src, src_iter); - struct bio_vec dst_bv = bio_iter_iovec(dst, dst_iter); - unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len); - void *src_buf = bvec_kmap_local(&src_bv); - void *dst_buf = bvec_kmap_local(&dst_bv); - - memcpy(dst_buf, src_buf, bytes); - - kunmap_local(dst_buf); - kunmap_local(src_buf); - - bio_advance_iter_single(src, &src_iter, bytes); - bio_advance_iter_single(dst, &dst_iter, bytes); - } + bio_copy_data_iter(dst, &dst_iter, src, &src_iter); } EXPORT_SYMBOL(bio_copy_data); diff --git a/block/blk-merge.c b/block/blk-merge.c index 35a8f75cc45d..b7c193d67185 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -309,6 +309,16 @@ static struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, *segs = nsegs; return NULL; split: + /* + * We can't sanely support splitting for a REQ_NOWAIT bio. End it + * with EAGAIN if splitting is required and return an error pointer. + */ + if (bio->bi_opf & REQ_NOWAIT) { + bio->bi_status = BLK_STS_AGAIN; + bio_endio(bio); + return ERR_PTR(-EAGAIN); + } + *segs = nsegs; /* @@ -358,11 +368,13 @@ struct bio *__bio_split_to_limits(struct bio *bio, default: split = bio_split_rw(bio, lim, nr_segs, bs, get_max_io_size(bio, lim) << SECTOR_SHIFT); + if (IS_ERR(split)) + return NULL; break; } if (split) { - /* there isn't chance to merge the splitted bio */ + /* there isn't chance to merge the split bio */ split->bi_opf |= REQ_NOMERGE; blkcg_bio_issue_init(split); diff --git a/block/blk-mq.c b/block/blk-mq.c index c5cf0dbca1db..2c49b4151da1 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2951,8 +2951,11 @@ void blk_mq_submit_bio(struct bio *bio) blk_status_t ret; bio = blk_queue_bounce(bio, q); - if (bio_may_exceed_limits(bio, &q->limits)) + if (bio_may_exceed_limits(bio, &q->limits)) { bio = __bio_split_to_limits(bio, &q->limits, &nr_segs); + if (!bio) + return; + } if (!bio_integrity_prep(bio)) return; diff --git a/block/genhd.c b/block/genhd.c index ab3cbe44196f..23cf83b3331c 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1201,10 +1201,21 @@ struct class block_class = { .dev_uevent = block_uevent, }; +static char *block_devnode(struct device *dev, umode_t *mode, + kuid_t *uid, kgid_t *gid) +{ + struct gendisk *disk = dev_to_disk(dev); + + if (disk->fops->devnode) + return disk->fops->devnode(disk, mode); + return NULL; +} + const struct device_type disk_type = { .name = "disk", .groups = disk_attr_groups, .release = disk_release, + .devnode = block_devnode, }; #ifdef CONFIG_PROC_FS diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index a2184b428493..a41145d52de9 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -285,6 +285,49 @@ config BLK_DEV_RAM_SIZE The default value is 4096 kilobytes. Only change this if you know what you are doing. +config CDROM_PKTCDVD + tristate "Packet writing on CD/DVD media (DEPRECATED)" + depends on !UML + depends on SCSI + select CDROM + help + Note: This driver is deprecated and will be removed from the + kernel in the near future! + + If you have a CDROM/DVD drive that supports packet writing, say + Y to include support. It should work with any MMC/Mt Fuji + compliant ATAPI or SCSI drive, which is just about any newer + DVD/CD writer. + + Currently only writing to CD-RW, DVD-RW, DVD+RW and DVDRAM discs + is possible. + DVD-RW disks must be in restricted overwrite mode. + + See the file <file:Documentation/cdrom/packet-writing.rst> + for further information on the use of this driver. + + To compile this driver as a module, choose M here: the + module will be called pktcdvd. + +config CDROM_PKTCDVD_BUFFERS + int "Free buffers for data gathering" + depends on CDROM_PKTCDVD + default "8" + help + This controls the maximum number of active concurrent packets. More + concurrent packets can increase write performance, but also require + more memory. Each concurrent packet will require approximately 64Kb + of non-swappable kernel memory, memory which will be allocated when + a disc is opened for writing. + +config CDROM_PKTCDVD_WCACHE + bool "Enable write caching" + depends on CDROM_PKTCDVD + help + If enabled, write caching will be set for the CD-R/W device. For now + this option is dangerous unless the CD-RW media is known good, as we + don't do deferred write error handling yet. + config ATA_OVER_ETH tristate "ATA over Ethernet support" depends on NET diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 962ee65d8ca3..101612cba303 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o obj-$(CONFIG_N64CART) += n64cart.o obj-$(CONFIG_BLK_DEV_RAM) += brd.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o +obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o obj-$(CONFIG_SUNVDC) += sunvdc.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index eb14ec8ec04c..e36216d50753 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1607,6 +1607,8 @@ void drbd_submit_bio(struct bio *bio) struct drbd_device *device = bio->bi_bdev->bd_disk->private_data; bio = bio_split_to_limits(bio); + if (!bio) + return; /* * what we "blindly" assume: diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c new file mode 100644 index 000000000000..4cea3b08087e --- /dev/null +++ b/drivers/block/pktcdvd.c @@ -0,0 +1,2944 @@ +/* + * Copyright (C) 2000 Jens Axboe <axboe@suse.de> + * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com> + * Copyright (C) 2006 Thomas Maier <balagi@justmail.de> + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * + * Packet writing layer for ATAPI and SCSI CD-RW, DVD+RW, DVD-RW and + * DVD-RAM devices. + * + * Theory of operation: + * + * At the lowest level, there is the standard driver for the CD/DVD device, + * such as drivers/scsi/sr.c. This driver can handle read and write requests, + * but it doesn't know anything about the special restrictions that apply to + * packet writing. One restriction is that write requests must be aligned to + * packet boundaries on the physical media, and the size of a write request + * must be equal to the packet size. Another restriction is that a + * GPCMD_FLUSH_CACHE command has to be issued to the drive before a read + * command, if the previous command was a write. + * + * The purpose of the packet writing driver is to hide these restrictions from + * higher layers, such as file systems, and present a block device that can be + * randomly read and written using 2kB-sized blocks. + * + * The lowest layer in the packet writing driver is the packet I/O scheduler. + * Its data is defined by the struct packet_iosched and includes two bio + * queues with pending read and write requests. These queues are processed + * by the pkt_iosched_process_queue() function. The write requests in this + * queue are already properly aligned and sized. This layer is responsible for + * issuing the flush cache commands and scheduling the I/O in a good order. + * + * The next layer transforms unaligned write requests to aligned writes. This + * transformation requires reading missing pieces of data from the underlying + * block device, assembling the pieces to full packets and queuing them to the + * packet I/O scheduler. + * + * At the top layer there is a custom ->submit_bio function that forwards + * read requests directly to the iosched queue and puts write requests in the + * unaligned write queue. A kernel thread performs the necessary read + * gathering to convert the unaligned writes to aligned writes and then feeds + * them to the packet I/O scheduler. + * + *************************************************************************/ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/pktcdvd.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/compat.h> +#include <linux/kthread.h> +#include <linux/errno.h> +#include <linux/spinlock.h> +#include <linux/file.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/miscdevice.h> +#include <linux/freezer.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/backing-dev.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_ioctl.h> +#include <scsi/scsi.h> +#include <linux/debugfs.h> +#include <linux/device.h> +#include <linux/nospec.h> +#include <linux/uaccess.h> + +#define DRIVER_NAME "pktcdvd" + +#define pkt_err(pd, fmt, ...) \ + pr_err("%s: " fmt, pd->name, ##__VA_ARGS__) +#define pkt_notice(pd, fmt, ...) \ + pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__) +#define pkt_info(pd, fmt, ...) \ + pr_info("%s: " fmt, pd->name, ##__VA_ARGS__) + +#define pkt_dbg(level, pd, fmt, ...) \ +do { \ + if (level == 2 && PACKET_DEBUG >= 2) \ + pr_notice("%s: %s():" fmt, \ + pd->name, __func__, ##__VA_ARGS__); \ + else if (level == 1 && PACKET_DEBUG >= 1) \ + pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__); \ +} while (0) + +#define MAX_SPEED 0xffff + +static DEFINE_MUTEX(pktcdvd_mutex); +static struct pktcdvd_device *pkt_devs[MAX_WRITERS]; +static struct proc_dir_entry *pkt_proc; +static int pktdev_major; +static int write_congestion_on = PKT_WRITE_CONGESTION_ON; +static int write_congestion_off = PKT_WRITE_CONGESTION_OFF; +static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */ +static mempool_t psd_pool; +static struct bio_set pkt_bio_set; + +static struct class *class_pktcdvd = NULL; /* /sys/class/pktcdvd */ +static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */ + +/* forward declaration */ +static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev); +static int pkt_remove_dev(dev_t pkt_dev); +static int pkt_seq_show(struct seq_file *m, void *p); + +static sector_t get_zone(sector_t sector, struct pktcdvd_device *pd) +{ + return (sector + pd->offset) & ~(sector_t)(pd->settings.size - 1); +} + +/********************************************************** + * sysfs interface for pktcdvd + * by (C) 2006 Thomas Maier <balagi@justmail.de> + + /sys/class/pktcdvd/pktcdvd[0-7]/ + stat/reset + stat/packets_started + stat/packets_finished + stat/kb_written + stat/kb_read + stat/kb_read_gather + write_queue/size + write_queue/congestion_off + write_queue/congestion_on + **********************************************************/ + +static ssize_t packets_started_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", pd->stats.pkt_started); +} +static DEVICE_ATTR_RO(packets_started); + +static ssize_t packets_finished_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", pd->stats.pkt_ended); +} +static DEVICE_ATTR_RO(packets_finished); + +static ssize_t kb_written_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", pd->stats.secs_w >> 1); +} +static DEVICE_ATTR_RO(kb_written); + +static ssize_t kb_read_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", pd->stats.secs_r >> 1); +} +static DEVICE_ATTR_RO(kb_read); + +static ssize_t kb_read_gather_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", pd->stats.secs_rg >> 1); +} +static DEVICE_ATTR_RO(kb_read_gather); + +static ssize_t reset_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + if (len > 0) { + pd->stats.pkt_started = 0; + pd->stats.pkt_ended = 0; + pd->stats.secs_w = 0; + pd->stats.secs_rg = 0; + pd->stats.secs_r = 0; + } + return len; +} +static DEVICE_ATTR_WO(reset); + +static struct attribute *pkt_stat_attrs[] = { + &dev_attr_packets_finished.attr, + &dev_attr_packets_started.attr, + &dev_attr_kb_read.attr, + &dev_attr_kb_written.attr, + &dev_attr_kb_read_gather.attr, + &dev_attr_reset.attr, + NULL, +}; + +static const struct attribute_group pkt_stat_group = { + .name = "stat", + .attrs = pkt_stat_attrs, +}; + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int n; + + spin_lock(&pd->lock); + n = sysfs_emit(buf, "%d\n", pd->bio_queue_size); + spin_unlock(&pd->lock); + return n; +} +static DEVICE_ATTR_RO(size); + +static void init_write_congestion_marks(int* lo, int* hi) +{ + if (*hi > 0) { + *hi = max(*hi, 500); + *hi = min(*hi, 1000000); + if (*lo <= 0) + *lo = *hi - 100; + else { + *lo = min(*lo, *hi - 100); + *lo = max(*lo, 100); + } + } else { + *hi = -1; + *lo = -1; + } +} + +static ssize_t congestion_off_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int n; + + spin_lock(&pd->lock); + n = sysfs_emit(buf, "%d\n", pd->write_congestion_off); + spin_unlock(&pd->lock); + return n; +} + +static ssize_t congestion_off_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int val; + + if (sscanf(buf, "%d", &val) == 1) { + spin_lock(&pd->lock); + pd->write_congestion_off = val; + init_write_congestion_marks(&pd->write_congestion_off, + &pd->write_congestion_on); + spin_unlock(&pd->lock); + } + return len; +} +static DEVICE_ATTR_RW(congestion_off); + +static ssize_t congestion_on_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int n; + + spin_lock(&pd->lock); + n = sysfs_emit(buf, "%d\n", pd->write_congestion_on); + spin_unlock(&pd->lock); + return n; +} + +static ssize_t congestion_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int val; + + if (sscanf(buf, "%d", &val) == 1) { + spin_lock(&pd->lock); + pd->write_congestion_on = val; + init_write_congestion_marks(&pd->write_congestion_off, + &pd->write_congestion_on); + spin_unlock(&pd->lock); + } + return len; +} +static DEVICE_ATTR_RW(congestion_on); + +static struct attribute *pkt_wq_attrs[] = { + &dev_attr_congestion_on.attr, + &dev_attr_congestion_off.attr, + &dev_attr_size.attr, + NULL, +}; + +static const struct attribute_group pkt_wq_group = { + .name = "write_queue", + .attrs = pkt_wq_attrs, +}; + +static const struct attribute_group *pkt_groups[] = { + &pkt_stat_group, + &pkt_wq_group, + NULL, +}; + +static void pkt_sysfs_dev_new(struct pktcdvd_device *pd) +{ + if (class_pktcdvd) { + pd->dev = device_create_with_groups(class_pktcdvd, NULL, + MKDEV(0, 0), pd, pkt_groups, + "%s", pd->name); + if (IS_ERR(pd->dev)) + pd->dev = NULL; + } +} + +static void pkt_sysfs_dev_remove(struct pktcdvd_device *pd) +{ + if (class_pktcdvd) + device_unregister(pd->dev); +} + + +/******************************************************************** + /sys/class/pktcdvd/ + add map block device + remove unmap packet dev + device_map show mappings + *******************************************************************/ + +static void class_pktcdvd_release(struct class *cls) +{ + kfree(cls); +} + +static ssize_t device_map_show(struct class *c, struct class_attribute *attr, + char *data) +{ + int n = 0; + int idx; + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + for (idx = 0; idx < MAX_WRITERS; idx++) { + struct pktcdvd_device *pd = pkt_devs[idx]; + if (!pd) + continue; + n += sprintf(data+n, "%s %u:%u %u:%u\n", + pd->name, + MAJOR(pd->pkt_dev), MINOR(pd->pkt_dev), + MAJOR(pd->bdev->bd_dev), + MINOR(pd->bdev->bd_dev)); + } + mutex_unlock(&ctl_mutex); + return n; +} +static CLASS_ATTR_RO(device_map); + +static ssize_t add_store(struct class *c, struct class_attribute *attr, + const char *buf, size_t count) +{ + unsigned int major, minor; + + if (sscanf(buf, "%u:%u", &major, &minor) == 2) { + /* pkt_setup_dev() expects caller to hold reference to self */ + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + pkt_setup_dev(MKDEV(major, minor), NULL); + + module_put(THIS_MODULE); + + return count; + } + + return -EINVAL; +} +static CLASS_ATTR_WO(add); + +static ssize_t remove_store(struct class *c, struct class_attribute *attr, + const char *buf, size_t count) +{ + unsigned int major, minor; + if (sscanf(buf, "%u:%u", &major, &minor) == 2) { + pkt_remove_dev(MKDEV(major, minor)); + return count; + } + return -EINVAL; +} +static CLASS_ATTR_WO(remove); + +static struct attribute *class_pktcdvd_attrs[] = { + &class_attr_add.attr, + &class_attr_remove.attr, + &class_attr_device_map.attr, + NULL, +}; +ATTRIBUTE_GROUPS(class_pktcdvd); + +static int pkt_sysfs_init(void) +{ + int ret = 0; + + /* + * create control files in sysfs + * /sys/class/pktcdvd/... + */ + class_pktcdvd = kzalloc(sizeof(*class_pktcdvd), GFP_KERNEL); + if (!class_pktcdvd) + return -ENOMEM; + class_pktcdvd->name = DRIVER_NAME; + class_pktcdvd->owner = THIS_MODULE; + class_pktcdvd->class_release = class_pktcdvd_release; + class_pktcdvd->class_groups = class_pktcdvd_groups; + ret = class_register(class_pktcdvd); + if (ret) { + kfree(class_pktcdvd); + class_pktcdvd = NULL; + pr_err("failed to create class pktcdvd\n"); + return ret; + } + return 0; +} + +static void pkt_sysfs_cleanup(void) +{ + if (class_pktcdvd) + class_destroy(class_pktcdvd); + class_pktcdvd = NULL; +} + +/******************************************************************** + entries in debugfs + + /sys/kernel/debug/pktcdvd[0-7]/ + info + + *******************************************************************/ + +static int pkt_debugfs_seq_show(struct seq_file *m, void *p) +{ + return pkt_seq_show(m, p); +} + +static int pkt_debugfs_fops_open(struct inode *inode, struct file *file) +{ + return single_open(file, pkt_debugfs_seq_show, inode->i_private); +} + +static const struct file_operations debug_fops = { + .open = pkt_debugfs_fops_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static void pkt_debugfs_dev_new(struct pktcdvd_device *pd) +{ + if (!pkt_debugfs_root) + return; + pd->dfs_d_root = debugfs_create_dir(pd->name, pkt_debugfs_root); + if (!pd->dfs_d_root) + return; + + pd->dfs_f_info = debugfs_create_file("info", 0444, + pd->dfs_d_root, pd, &debug_fops); +} + +static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd) +{ + if (!pkt_debugfs_root) + return; + debugfs_remove(pd->dfs_f_info); + debugfs_remove(pd->dfs_d_root); + pd->dfs_f_info = NULL; + pd->dfs_d_root = NULL; +} + +static void pkt_debugfs_init(void) +{ + pkt_debugfs_root = debugfs_create_dir(DRIVER_NAME, NULL); +} + +static void pkt_debugfs_cleanup(void) +{ + debugfs_remove(pkt_debugfs_root); + pkt_debugfs_root = NULL; +} + +/* ----------------------------------------------------------*/ + + +static void pkt_bio_finished(struct pktcdvd_device *pd) +{ + BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0); + if (atomic_dec_and_test(&pd->cdrw.pending_bios)) { + pkt_dbg(2, pd, "queue empty\n"); + atomic_set(&pd->iosched.attention, 1); + wake_up(&pd->wqueue); + } +} + +/* + * Allocate a packet_data struct + */ +static struct packet_data *pkt_alloc_packet_data(int frames) +{ + int i; + struct packet_data *pkt; + + pkt = kzalloc(sizeof(struct packet_data), GFP_KERNEL); + if (!pkt) + goto no_pkt; + + pkt->frames = frames; + pkt->w_bio = bio_kmalloc(frames, GFP_KERNEL); + if (!pkt->w_bio) + goto no_bio; + + for (i = 0; i < frames / FRAMES_PER_PAGE; i++) { + pkt->pages[i] = alloc_page(GFP_KERNEL|__GFP_ZERO); + if (!pkt->pages[i]) + goto no_page; + } + + spin_lock_init(&pkt->lock); + bio_list_init(&pkt->orig_bios); + + for (i = 0; i < frames; i++) { + pkt->r_bios[i] = bio_kmalloc(1, GFP_KERNEL); + if (!pkt->r_bios[i]) + goto no_rd_bio; + } + + return pkt; + +no_rd_bio: + for (i = 0; i < frames; i++) + kfree(pkt->r_bios[i]); +no_page: + for (i = 0; i < frames / FRAMES_PER_PAGE; i++) + if (pkt->pages[i]) + __free_page(pkt->pages[i]); + kfree(pkt->w_bio); +no_bio: + kfree(pkt); +no_pkt: + return NULL; +} + +/* + * Free a packet_data struct + */ +static void pkt_free_packet_data(struct packet_data *pkt) +{ + int i; + + for (i = 0; i < pkt->frames; i++) + kfree(pkt->r_bios[i]); + for (i = 0; i < pkt->frames / FRAMES_PER_PAGE; i++) + __free_page(pkt->pages[i]); + kfree(pkt->w_bio); + kfree(pkt); +} + +static void pkt_shrink_pktlist(struct pktcdvd_device *pd) +{ + struct packet_data *pkt, *next; + + BUG_ON(!list_empty(&pd->cdrw.pkt_active_list)); + + list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_free_list, list) { + pkt_free_packet_data(pkt); + } + INIT_LIST_HEAD(&pd->cdrw.pkt_free_list); +} + +static int pkt_grow_pktlist(struct pktcdvd_device *pd, int nr_packets) +{ + struct packet_data *pkt; + + BUG_ON(!list_empty(&pd->cdrw.pkt_free_list)); + + while (nr_packets > 0) { + pkt = pkt_alloc_packet_data(pd->settings.size >> 2); + if (!pkt) { + pkt_shrink_pktlist(pd); + return 0; + } + pkt->id = nr_packets; + pkt->pd = pd; + list_add(&pkt->list, &pd->cdrw.pkt_free_list); + nr_packets--; + } + return 1; +} + +static inline struct pkt_rb_node *pkt_rbtree_next(struct pkt_rb_node *node) +{ + struct rb_node *n = rb_next(&node->rb_node); + if (!n) + return NULL; + return rb_entry(n, struct pkt_rb_node, rb_node); +} + +static void pkt_rbtree_erase(struct pktcdvd_device *pd, struct pkt_rb_node *node) +{ + rb_erase(&node->rb_node, &pd->bio_queue); + mempool_free(node, &pd->rb_pool); + pd->bio_queue_size--; + BUG_ON(pd->bio_queue_size < 0); +} + +/* + * Find the first node in the pd->bio_queue rb tree with a starting sector >= s. + */ +static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s) +{ + struct rb_node *n = pd->bio_queue.rb_node; + struct rb_node *next; + struct pkt_rb_node *tmp; + + if (!n) { + BUG_ON(pd->bio_queue_size > 0); + return NULL; + } + + for (;;) { + tmp = rb_entry(n, struct pkt_rb_node, rb_node); + if (s <= tmp->bio->bi_iter.bi_sector) + next = n->rb_left; + else + next = n->rb_right; + if (!next) + break; + n = next; + } + + if (s > tmp->bio->bi_iter.bi_sector) { + tmp = pkt_rbtree_next(tmp); + if (!tmp) + return NULL; + } + BUG_ON(s > tmp->bio->bi_iter.bi_sector); + return tmp; +} + +/* + * Insert a node into the pd->bio_queue rb tree. + */ +static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *node) +{ + struct rb_node **p = &pd->bio_queue.rb_node; + struct rb_node *parent = NULL; + sector_t s = node->bio->bi_iter.bi_sector; + struct pkt_rb_node *tmp; + + while (*p) { + parent = *p; + tmp = rb_entry(parent, struct pkt_rb_node, rb_node); + if (s < tmp->bio->bi_iter.bi_sector) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&node->rb_node, parent, p); + rb_insert_color(&node->rb_node, &pd->bio_queue); + pd->bio_queue_size++; +} + +/* + * Send a packet_command to the underlying block device and + * wait for completion. + */ +static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc) +{ + struct request_queue *q = bdev_get_queue(pd->bdev); + struct scsi_cmnd *scmd; + struct request *rq; + int ret = 0; + + rq = scsi_alloc_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); + if (IS_ERR(rq)) + return PTR_ERR(rq); + scmd = blk_mq_rq_to_pdu(rq); + + if (cgc->buflen) { + ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, + GFP_NOIO); + if (ret) + goto out; + } + + scmd->cmd_len = COMMAND_SIZE(cgc->cmd[0]); + memcpy(scmd->cmnd, cgc->cmd, CDROM_PACKET_SIZE); + + rq->timeout = 60*HZ; + if (cgc->quiet) + rq->rq_flags |= RQF_QUIET; + + blk_execute_rq(rq, false); + if (scmd->result) + ret = -EIO; +out: + blk_mq_free_request(rq); + return ret; +} + +static const char *sense_key_string(__u8 index) +{ + static const char * const info[] = { + "No sense", "Recovered error", "Not ready", + "Medium error", "Hardware error", "Illegal request", + "Unit attention", "Data protect", "Blank check", + }; + + return index < ARRAY_SIZE(info) ? info[index] : "INVALID"; +} + +/* + * A generic sense dump / resolve mechanism should be implemented across + * all ATAPI + SCSI devices. + */ +static void pkt_dump_sense(struct pktcdvd_device *pd, + struct packet_command *cgc) +{ + struct scsi_sense_hdr *sshdr = cgc->sshdr; + + if (sshdr) + pkt_err(pd, "%*ph - sense %02x.%02x.%02x (%s)\n", + CDROM_PACKET_SIZE, cgc->cmd, + sshdr->sense_key, sshdr->asc, sshdr->ascq, + sense_key_string(sshdr->sense_key)); + else + pkt_err(pd, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd); +} + +/* + * flush the drive cache to media + */ +static int pkt_flush_cache(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.cmd[0] = GPCMD_FLUSH_CACHE; + cgc.quiet = 1; + + /* + * the IMMED bit -- we default to not setting it, although that + * would allow a much faster close, this is safer + */ +#if 0 + cgc.cmd[1] = 1 << 1; +#endif + return pkt_generic_packet(pd, &cgc); +} + +/* + * speed is given as the normal factor, e.g. 4 for 4x + */ +static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd, + unsigned write_speed, unsigned read_speed) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + int ret; + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.sshdr = &sshdr; + cgc.cmd[0] = GPCMD_SET_SPEED; + cgc.cmd[2] = (read_speed >> 8) & 0xff; + cgc.cmd[3] = read_speed & 0xff; + cgc.cmd[4] = (write_speed >> 8) & 0xff; + cgc.cmd[5] = write_speed & 0xff; + + ret = pkt_generic_packet(pd, &cgc); + if (ret) + pkt_dump_sense(pd, &cgc); + + return ret; +} + +/* + * Queue a bio for processing by the low-level CD device. Must be called + * from process context. + */ +static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio) +{ + spin_lock(&pd->iosched.lock); + if (bio_data_dir(bio) == READ) + bio_list_add(&pd->iosched.read_queue, bio); + else + bio_list_add(&pd->iosched.write_queue, bio); + spin_unlock(&pd->iosched.lock); + + atomic_set(&pd->iosched.attention, 1); + wake_up(&pd->wqueue); +} + +/* + * Process the queued read/write requests. This function handles special + * requirements for CDRW drives: + * - A cache flush command must be inserted before a read request if the + * previous request was a write. + * - Switching between reading and writing is slow, so don't do it more often + * than necessary. + * - Optimize for throughput at the expense of latency. This means that streaming + * writes will never be interrupted by a read, but if the drive has to seek + * before the next write, switch to reading instead if there are any pending + * read requests. + * - Set the read speed according to current usage pattern. When only reading + * from the device, it's best to use the highest possible read speed, but + * when switching often between reading and writing, it's better to have the + * same read and write speeds. + */ +static void pkt_iosched_process_queue(struct pktcdvd_device *pd) +{ + + if (atomic_read(&pd->iosched.attention) == 0) + return; + atomic_set(&pd->iosched.attention, 0); + + for (;;) { + struct bio *bio; + int reads_queued, writes_queued; + + spin_lock(&pd->iosched.lock); + reads_queued = !bio_list_empty(&pd->iosched.read_queue); + writes_queued = !bio_list_empty(&pd->iosched.write_queue); + spin_unlock(&pd->iosched.lock); + + if (!reads_queued && !writes_queued) + break; + + if (pd->iosched.writing) { + int need_write_seek = 1; + spin_lock(&pd->iosched.lock); + bio = bio_list_peek(&pd->iosched.write_queue); + spin_unlock(&pd->iosched.lock); + if (bio && (bio->bi_iter.bi_sector == + pd->iosched.last_write)) + need_write_seek = 0; + if (need_write_seek && reads_queued) { + if (atomic_read(&pd->cdrw.pending_bios) > 0) { + pkt_dbg(2, pd, "write, waiting\n"); + break; + } + pkt_flush_cache(pd); + pd->iosched.writing = 0; + } + } else { + if (!reads_queued && writes_queued) { + if (atomic_read(&pd->cdrw.pending_bios) > 0) { + pkt_dbg(2, pd, "read, waiting\n"); + break; + } + pd->iosched.writing = 1; + } + } + + spin_lock(&pd->iosched.lock); + if (pd->iosched.writing) + bio = bio_list_pop(&pd->iosched.write_queue); + else + bio = bio_list_pop(&pd->iosched.read_queue); + spin_unlock(&pd->iosched.lock); + + if (!bio) + continue; + + if (bio_data_dir(bio) == READ) + pd->iosched.successive_reads += + bio->bi_iter.bi_size >> 10; + else { + pd->iosched.successive_reads = 0; + pd->iosched.last_write = bio_end_sector(bio); + } + if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) { + if (pd->read_speed == pd->write_speed) { + pd->read_speed = MAX_SPEED; + pkt_set_speed(pd, pd->write_speed, pd->read_speed); + } + } else { + if (pd->read_speed != pd->write_speed) { + pd->read_speed = pd->write_speed; + pkt_set_speed(pd, pd->write_speed, pd->read_speed); + } + } + + atomic_inc(&pd->cdrw.pending_bios); + submit_bio_noacct(bio); + } +} + +/* + * Special care is needed if the underlying block device has a small + * max_phys_segments value. + */ +static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q) +{ + if ((pd->settings.size << 9) / CD_FRAMESIZE + <= queue_max_segments(q)) { + /* + * The cdrom device can handle one segment/frame + */ + clear_bit(PACKET_MERGE_SEGS, &pd->flags); + return 0; + } else if ((pd->settings.size << 9) / PAGE_SIZE + <= queue_max_segments(q)) { + /* + * We can handle this case at the expense of some extra memory + * copies during write operations + */ + set_bit(PACKET_MERGE_SEGS, &pd->flags); + return 0; + } else { + pkt_err(pd, "cdrom max_phys_segments too small\n"); + return -EIO; + } +} + +static void pkt_end_io_read(struct bio *bio) +{ + struct packet_data *pkt = bio->bi_private; + struct pktcdvd_device *pd = pkt->pd; + BUG_ON(!pd); + + pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n", + bio, (unsigned long long)pkt->sector, + (unsigned long long)bio->bi_iter.bi_sector, bio->bi_status); + + if (bio->bi_status) + atomic_inc(&pkt->io_errors); + bio_uninit(bio); + if (atomic_dec_and_test(&pkt->io_wait)) { + atomic_inc(&pkt->run_sm); + wake_up(&pd->wqueue); + } + pkt_bio_finished(pd); +} + +static void pkt_end_io_packet_write(struct bio *bio) +{ + struct packet_data *pkt = bio->bi_private; + struct pktcdvd_device *pd = pkt->pd; + BUG_ON(!pd); + + pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status); + + pd->stats.pkt_ended++; + + bio_uninit(bio); + pkt_bio_finished(pd); + atomic_dec(&pkt->io_wait); + atomic_inc(&pkt->run_sm); + wake_up(&pd->wqueue); +} + +/* + * Schedule reads for the holes in a packet + */ +static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + int frames_read = 0; + struct bio *bio; + int f; + char written[PACKET_MAX_SIZE]; + + BUG_ON(bio_list_empty(&pkt->orig_bios)); + + atomic_set(&pkt->io_wait, 0); + atomic_set(&pkt->io_errors, 0); + + /* + * Figure out which frames we need to read before we can write. + */ + memset(written, 0, sizeof(written)); + spin_lock(&pkt->lock); + bio_list_for_each(bio, &pkt->orig_bios) { + int first_frame = (bio->bi_iter.bi_sector - pkt->sector) / + (CD_FRAMESIZE >> 9); + int num_frames = bio->bi_iter.bi_size / CD_FRAMESIZE; + pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9); + BUG_ON(first_frame < 0); + BUG_ON(first_frame + num_frames > pkt->frames); + for (f = first_frame; f < first_frame + num_frames; f++) + written[f] = 1; + } + spin_unlock(&pkt->lock); + + if (pkt->cache_valid) { + pkt_dbg(2, pd, "zone %llx cached\n", + (unsigned long long)pkt->sector); + goto out_account; + } + + /* + * Schedule reads for missing parts of the packet. + */ + for (f = 0; f < pkt->frames; f++) { + int p, offset; + + if (written[f]) + continue; + + bio = pkt->r_bios[f]; + bio_init(bio, pd->bdev, bio->bi_inline_vecs, 1, REQ_OP_READ); + bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); + bio->bi_end_io = pkt_end_io_read; + bio->bi_private = pkt; + + p = (f * CD_FRAMESIZE) / PAGE_SIZE; + offset = (f * CD_FRAMESIZE) % PAGE_SIZE; + pkt_dbg(2, pd, "Adding frame %d, page:%p offs:%d\n", + f, pkt->pages[p], offset); + if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset)) + BUG(); + + atomic_inc(&pkt->io_wait); + pkt_queue_bio(pd, bio); + frames_read++; + } + +out_account: + pkt_dbg(2, pd, "need %d frames for zone %llx\n", + frames_read, (unsigned long long)pkt->sector); + pd->stats.pkt_started++; + pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9); +} + +/* + * Find a packet matching zone, or the least recently used packet if + * there is no match. + */ +static struct packet_data *pkt_get_packet_data(struct pktcdvd_device *pd, int zone) +{ + struct packet_data *pkt; + + list_for_each_entry(pkt, &pd->cdrw.pkt_free_list, list) { + if (pkt->sector == zone || pkt->list.next == &pd->cdrw.pkt_free_list) { + list_del_init(&pkt->list); + if (pkt->sector != zone) + pkt->cache_valid = 0; + return pkt; + } + } + BUG(); + return NULL; +} + +static void pkt_put_packet_data(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + if (pkt->cache_valid) { + list_add(&pkt->list, &pd->cdrw.pkt_free_list); + } else { + list_add_tail(&pkt->list, &pd->cdrw.pkt_free_list); + } +} + +static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state state) +{ +#if PACKET_DEBUG > 1 + static const char *state_name[] = { + "IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED" + }; + enum packet_data_state old_state = pkt->state; + pkt_dbg(2, pd, "pkt %2d : s=%6llx %s -> %s\n", + pkt->id, (unsigned long long)pkt->sector, + state_name[old_state], state_name[state]); +#endif + pkt->state = state; +} + +/* + * Scan the work queue to see if we can start a new packet. + * returns non-zero if any work was done. + */ +static int pkt_handle_queue(struct pktcdvd_device *pd) +{ + struct packet_data *pkt, *p; + struct bio *bio = NULL; + sector_t zone = 0; /* Suppress gcc warning */ + struct pkt_rb_node *node, *first_node; + struct rb_node *n; + + atomic_set(&pd->scan_queue, 0); + + if (list_empty(&pd->cdrw.pkt_free_list)) { + pkt_dbg(2, pd, "no pkt\n"); + return 0; + } + + /* + * Try to find a zone we are not already working on. + */ + spin_lock(&pd->lock); + first_node = pkt_rbtree_find(pd, pd->current_sector); + if (!first_node) { + n = rb_first(&pd->bio_queue); + if (n) + first_node = rb_entry(n, struct pkt_rb_node, rb_node); + } + node = first_node; + while (node) { + bio = node->bio; + zone = get_zone(bio->bi_iter.bi_sector, pd); + list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) { + if (p->sector == zone) { + bio = NULL; + goto try_next_bio; + } + } + break; +try_next_bio: + node = pkt_rbtree_next(node); + if (!node) { + n = rb_first(&pd->bio_queue); + if (n) + node = rb_entry(n, struct pkt_rb_node, rb_node); + } + if (node == first_node) + node = NULL; + } + spin_unlock(&pd->lock); + if (!bio) { + pkt_dbg(2, pd, "no bio\n"); + return 0; + } + + pkt = pkt_get_packet_data(pd, zone); + + pd->current_sector = zone + pd->settings.size; + pkt->sector = zone; + BUG_ON(pkt->frames != pd->settings.size >> 2); + pkt->write_size = 0; + + /* + * Scan work queue for bios in the same zone and link them + * to this packet. + */ + spin_lock(&pd->lock); + pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone); + while ((node = pkt_rbtree_find(pd, zone)) != NULL) { + bio = node->bio; + pkt_dbg(2, pd, "found zone=%llx\n", (unsigned long long) + get_zone(bio->bi_iter.bi_sector, pd)); + if (get_zone(bio->bi_iter.bi_sector, pd) != zone) + break; + pkt_rbtree_erase(pd, node); + spin_lock(&pkt->lock); + bio_list_add(&pkt->orig_bios, bio); + pkt->write_size += bio->bi_iter.bi_size / CD_FRAMESIZE; + spin_unlock(&pkt->lock); + } + /* check write congestion marks, and if bio_queue_size is + * below, wake up any waiters + */ + if (pd->congested && + pd->bio_queue_size <= pd->write_congestion_off) { + pd->congested = false; + wake_up_var(&pd->congested); + } + spin_unlock(&pd->lock); + + pkt->sleep_time = max(PACKET_WAIT_TIME, 1); + pkt_set_state(pkt, PACKET_WAITING_STATE); + atomic_set(&pkt->run_sm, 1); + + spin_lock(&pd->cdrw.active_list_lock); + list_add(&pkt->list, &pd->cdrw.pkt_active_list); + spin_unlock(&pd->cdrw.active_list_lock); + + return 1; +} + +/** + * bio_list_copy_data - copy contents of data buffers from one chain of bios to + * another + * @src: source bio list + * @dst: destination bio list + * + * Stops when it reaches the end of either the @src list or @dst list - that is, + * copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of + * bios). + */ +static void bio_list_copy_data(struct bio *dst, struct bio *src) +{ + struct bvec_iter src_iter = src->bi_iter; + struct bvec_iter dst_iter = dst->bi_iter; + + while (1) { + if (!src_iter.bi_size) { + src = src->bi_next; + if (!src) + break; + + src_iter = src->bi_iter; + } + + if (!dst_iter.bi_size) { + dst = dst->bi_next; + if (!dst) + break; + + dst_iter = dst->bi_iter; + } + + bio_copy_data_iter(dst, &dst_iter, src, &src_iter); + } +} + +/* + * Assemble a bio to write one packet and queue the bio for processing + * by the underlying block device. + */ +static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + int f; + + bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames, + REQ_OP_WRITE); + pkt->w_bio->bi_iter.bi_sector = pkt->sector; + pkt->w_bio->bi_end_io = pkt_end_io_packet_write; + pkt->w_bio->bi_private = pkt; + + /* XXX: locking? */ + for (f = 0; f < pkt->frames; f++) { + struct page *page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE]; + unsigned offset = (f * CD_FRAMESIZE) % PAGE_SIZE; + + if (!bio_add_page(pkt->w_bio, page, CD_FRAMESIZE, offset)) + BUG(); + } + pkt_dbg(2, pd, "vcnt=%d\n", pkt->w_bio->bi_vcnt); + + /* + * Fill-in bvec with data from orig_bios. + */ + spin_lock(&pkt->lock); + bio_list_copy_data(pkt->w_bio, pkt->orig_bios.head); + + pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE); + spin_unlock(&pkt->lock); + + pkt_dbg(2, pd, "Writing %d frames for zone %llx\n", + pkt->write_size, (unsigned long long)pkt->sector); + + if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) + pkt->cache_valid = 1; + else + pkt->cache_valid = 0; + + /* Start the write request */ + atomic_set(&pkt->io_wait, 1); + pkt_queue_bio(pd, pkt->w_bio); +} + +static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status) +{ + struct bio *bio; + + if (status) + pkt->cache_valid = 0; + + /* Finish all bios corresponding to this packet */ + while ((bio = bio_list_pop(&pkt->orig_bios))) { + bio->bi_status = status; + bio_endio(bio); + } +} + +static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + pkt_dbg(2, pd, "pkt %d\n", pkt->id); + + for (;;) { + switch (pkt->state) { + case PACKET_WAITING_STATE: + if ((pkt->write_size < pkt->frames) && (pkt->sleep_time > 0)) + return; + + pkt->sleep_time = 0; + pkt_gather_data(pd, pkt); + pkt_set_state(pkt, PACKET_READ_WAIT_STATE); + break; + + case PACKET_READ_WAIT_STATE: + if (atomic_read(&pkt->io_wait) > 0) + return; + + if (atomic_read(&pkt->io_errors) > 0) { + pkt_set_state(pkt, PACKET_RECOVERY_STATE); + } else { + pkt_start_write(pd, pkt); + } + break; + + case PACKET_WRITE_WAIT_STATE: + if (atomic_read(&pkt->io_wait) > 0) + return; + + if (!pkt->w_bio->bi_status) { + pkt_set_state(pkt, PACKET_FINISHED_STATE); + } else { + pkt_set_state(pkt, PACKET_RECOVERY_STATE); + } + break; + + case PACKET_RECOVERY_STATE: + pkt_dbg(2, pd, "No recovery possible\n"); + pkt_set_state(pkt, PACKET_FINISHED_STATE); + break; + + case PACKET_FINISHED_STATE: + pkt_finish_packet(pkt, pkt->w_bio->bi_status); + return; + + default: + BUG(); + break; + } + } +} + +static void pkt_handle_packets(struct pktcdvd_device *pd) +{ + struct packet_data *pkt, *next; + + /* + * Run state machine for active packets + */ + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (atomic_read(&pkt->run_sm) > 0) { + atomic_set(&pkt->run_sm, 0); + pkt_run_state_machine(pd, pkt); + } + } + + /* + * Move no longer active packets to the free list + */ + spin_lock(&pd->cdrw.active_list_lock); + list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_active_list, list) { + if (pkt->state == PACKET_FINISHED_STATE) { + list_del(&pkt->list); + pkt_put_packet_data(pd, pkt); + pkt_set_state(pkt, PACKET_IDLE_STATE); + atomic_set(&pd->scan_queue, 1); + } + } + spin_unlock(&pd->cdrw.active_list_lock); +} + +static void pkt_count_states(struct pktcdvd_device *pd, int *states) +{ + struct packet_data *pkt; + int i; + + for (i = 0; i < PACKET_NUM_STATES; i++) + states[i] = 0; + + spin_lock(&pd->cdrw.active_list_lock); + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + states[pkt->state]++; + } + spin_unlock(&pd->cdrw.active_list_lock); +} + +/* + * kcdrwd is woken up when writes have been queued for one of our + * registered devices + */ +static int kcdrwd(void *foobar) +{ + struct pktcdvd_device *pd = foobar; + struct packet_data *pkt; + long min_sleep_time, residue; + + set_user_nice(current, MIN_NICE); + set_freezable(); + + for (;;) { + DECLARE_WAITQUEUE(wait, current); + + /* + * Wait until there is something to do + */ + add_wait_queue(&pd->wqueue, &wait); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + + /* Check if we need to run pkt_handle_queue */ + if (atomic_read(&pd->scan_queue) > 0) + goto work_to_do; + + /* Check if we need to run the state machine for some packet */ + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (atomic_read(&pkt->run_sm) > 0) + goto work_to_do; + } + + /* Check if we need to process the iosched queues */ + if (atomic_read(&pd->iosched.attention) != 0) + goto work_to_do; + + /* Otherwise, go to sleep */ + if (PACKET_DEBUG > 1) { + int states[PACKET_NUM_STATES]; + pkt_count_states(pd, states); + pkt_dbg(2, pd, "i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n", + states[0], states[1], states[2], + states[3], states[4], states[5]); + } + + min_sleep_time = MAX_SCHEDULE_TIMEOUT; + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (pkt->sleep_time && pkt->sleep_time < min_sleep_time) + min_sleep_time = pkt->sleep_time; + } + + pkt_dbg(2, pd, "sleeping\n"); + residue = schedule_timeout(min_sleep_time); + pkt_dbg(2, pd, "wake up\n"); + + /* make swsusp happy with our thread */ + try_to_freeze(); + + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (!pkt->sleep_time) + continue; + pkt->sleep_time -= min_sleep_time - residue; + if (pkt->sleep_time <= 0) { + pkt->sleep_time = 0; + atomic_inc(&pkt->run_sm); + } + } + + if (kthread_should_stop()) + break; + } +work_to_do: + set_current_state(TASK_RUNNING); + remove_wait_queue(&pd->wqueue, &wait); + + if (kthread_should_stop()) + break; + + /* + * if pkt_handle_queue returns true, we can queue + * another request. + */ + while (pkt_handle_queue(pd)) + ; + + /* + * Handle packet state machine + */ + pkt_handle_packets(pd); + + /* + * Handle iosched queues + */ + pkt_iosched_process_queue(pd); + } + + return 0; +} + +static void pkt_print_settings(struct pktcdvd_device *pd) +{ + pkt_info(pd, "%s packets, %u blocks, Mode-%c disc\n", + pd->settings.fp ? "Fixed" : "Variable", + pd->settings.size >> 2, + pd->settings.block_mode == 8 ? '1' : '2'); +} + +static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc, int page_code, int page_control) +{ + memset(cgc->cmd, 0, sizeof(cgc->cmd)); + + cgc->cmd[0] = GPCMD_MODE_SENSE_10; + cgc->cmd[2] = page_code | (page_control << 6); + cgc->cmd[7] = cgc->buflen >> 8; + cgc->cmd[8] = cgc->buflen & 0xff; + cgc->data_direction = CGC_DATA_READ; + return pkt_generic_packet(pd, cgc); +} + +static int pkt_mode_select(struct pktcdvd_device *pd, struct packet_command *cgc) +{ + memset(cgc->cmd, 0, sizeof(cgc->cmd)); + memset(cgc->buffer, 0, 2); + cgc->cmd[0] = GPCMD_MODE_SELECT_10; + cgc->cmd[1] = 0x10; /* PF */ + cgc->cmd[7] = cgc->buflen >> 8; + cgc->cmd[8] = cgc->buflen & 0xff; + cgc->data_direction = CGC_DATA_WRITE; + return pkt_generic_packet(pd, cgc); +} + +static int pkt_get_disc_info(struct pktcdvd_device *pd, disc_information *di) +{ + struct packet_command cgc; + int ret; + + /* set up command and get the disc info */ + init_cdrom_command(&cgc, di, sizeof(*di), CGC_DATA_READ); + cgc.cmd[0] = GPCMD_READ_DISC_INFO; + cgc.cmd[8] = cgc.buflen = 2; + cgc.quiet = 1; + + ret = pkt_generic_packet(pd, &cgc); + if (ret) + return ret; + + /* not all drives have the same disc_info length, so requeue + * packet with the length the drive tells us it can supply + */ + cgc.buflen = be16_to_cpu(di->disc_information_length) + + sizeof(di->disc_information_length); + + if (cgc.buflen > sizeof(disc_information)) + cgc.buflen = sizeof(disc_information); + + cgc.cmd[8] = cgc.buflen; + return pkt_generic_packet(pd, &cgc); +} + +static int pkt_get_track_info(struct pktcdvd_device *pd, __u16 track, __u8 type, track_information *ti) +{ + struct packet_command cgc; + int ret; + + init_cdrom_command(&cgc, ti, 8, CGC_DATA_READ); + cgc.cmd[0] = GPCMD_READ_TRACK_RZONE_INFO; + cgc.cmd[1] = type & 3; + cgc.cmd[4] = (track & 0xff00) >> 8; + cgc.cmd[5] = track & 0xff; + cgc.cmd[8] = 8; + cgc.quiet = 1; + + ret = pkt_generic_packet(pd, &cgc); + if (ret) + return ret; + + cgc.buflen = be16_to_cpu(ti->track_information_length) + + sizeof(ti->track_information_length); + + if (cgc.buflen > sizeof(track_information)) + cgc.buflen = sizeof(track_information); + + cgc.cmd[8] = cgc.buflen; + return pkt_generic_packet(pd, &cgc); +} + +static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd, + long *last_written) +{ + disc_information di; + track_information ti; + __u32 last_track; + int ret; + + ret = pkt_get_disc_info(pd, &di); + if (ret) + return ret; + + last_track = (di.last_track_msb << 8) | di.last_track_lsb; + ret = pkt_get_track_info(pd, last_track, 1, &ti); + if (ret) + return ret; + + /* if this track is blank, try the previous. */ + if (ti.blank) { + last_track--; + ret = pkt_get_track_info(pd, last_track, 1, &ti); + if (ret) + return ret; + } + + /* if last recorded field is valid, return it. */ + if (ti.lra_v) { + *last_written = be32_to_cpu(ti.last_rec_address); + } else { + /* make it up instead */ + *last_written = be32_to_cpu(ti.track_start) + + be32_to_cpu(ti.track_size); + if (ti.free_blocks) + *last_written -= (be32_to_cpu(ti.free_blocks) + 7); + } + return 0; +} + +/* + * write mode select package based on pd->settings + */ +static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + write_param_page *wp; + char buffer[128]; + int ret, size; + + /* doesn't apply to DVD+RW or DVD-RAM */ + if ((pd->mmc3_profile == 0x1a) || (pd->mmc3_profile == 0x12)) + return 0; + + memset(buffer, 0, sizeof(buffer)); + init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ); + cgc.sshdr = &sshdr; + ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + + size = 2 + ((buffer[0] << 8) | (buffer[1] & 0xff)); + pd->mode_offset = (buffer[6] << 8) | (buffer[7] & 0xff); + if (size > sizeof(buffer)) + size = sizeof(buffer); + + /* + * now get it all + */ + init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ); + cgc.sshdr = &sshdr; + ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + + /* + * write page is offset header + block descriptor length + */ + wp = (write_param_page *) &buffer[sizeof(struct mode_page_header) + pd->mode_offset]; + + wp->fp = pd->settings.fp; + wp->track_mode = pd->settings.track_mode; + wp->write_type = pd->settings.write_type; + wp->data_block_type = pd->settings.block_mode; + + wp->multi_session = 0; + +#ifdef PACKET_USE_LS + wp->link_size = 7; + wp->ls_v = 1; +#endif + + if (wp->data_block_type == PACKET_BLOCK_MODE1) { + wp->session_format = 0; + wp->subhdr2 = 0x20; + } else if (wp->data_block_type == PACKET_BLOCK_MODE2) { + wp->session_format = 0x20; + wp->subhdr2 = 8; +#if 0 + wp->mcn[0] = 0x80; + memcpy(&wp->mcn[1], PACKET_MCN, sizeof(wp->mcn) - 1); +#endif + } else { + /* + * paranoia + */ + pkt_err(pd, "write mode wrong %d\n", wp->data_block_type); + return 1; + } + wp->packet_size = cpu_to_be32(pd->settings.size >> 2); + + cgc.buflen = cgc.cmd[8] = size; + ret = pkt_mode_select(pd, &cgc); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + + pkt_print_settings(pd); + return 0; +} + +/* + * 1 -- we can write to this track, 0 -- we can't + */ +static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti) +{ + switch (pd->mmc3_profile) { + case 0x1a: /* DVD+RW */ + case 0x12: /* DVD-RAM */ + /* The track is always writable on DVD+RW/DVD-RAM */ + return 1; + default: + break; + } + + if (!ti->packet || !ti->fp) + return 0; + + /* + * "good" settings as per Mt Fuji. + */ + if (ti->rt == 0 && ti->blank == 0) + return 1; + + if (ti->rt == 0 && ti->blank == 1) + return 1; + + if (ti->rt == 1 && ti->blank == 0) + return 1; + + pkt_err(pd, "bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet); + return 0; +} + +/* + * 1 -- we can write to this disc, 0 -- we can't + */ +static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di) +{ + switch (pd->mmc3_profile) { + case 0x0a: /* CD-RW */ + case 0xffff: /* MMC3 not supported */ + break; + case 0x1a: /* DVD+RW */ + case 0x13: /* DVD-RW */ + case 0x12: /* DVD-RAM */ + return 1; + default: + pkt_dbg(2, pd, "Wrong disc profile (%x)\n", + pd->mmc3_profile); + return 0; + } + + /* + * for disc type 0xff we should probably reserve a new track. + * but i'm not sure, should we leave this to user apps? probably. + */ + if (di->disc_type == 0xff) { + pkt_notice(pd, "unknown disc - no track?\n"); + return 0; + } + + if (di->disc_type != 0x20 && di->disc_type != 0) { + pkt_err(pd, "wrong disc type (%x)\n", di->disc_type); + return 0; + } + + if (di->erasable == 0) { + pkt_notice(pd, "disc not erasable\n"); + return 0; + } + + if (di->border_status == PACKET_SESSION_RESERVED) { + pkt_err(pd, "can't write to last track (reserved)\n"); + return 0; + } + + return 1; +} + +static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + unsigned char buf[12]; + disc_information di; + track_information ti; + int ret, track; + + init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); + cgc.cmd[0] = GPCMD_GET_CONFIGURATION; + cgc.cmd[8] = 8; + ret = pkt_generic_packet(pd, &cgc); + pd->mmc3_profile = ret ? 0xffff : buf[6] << 8 | buf[7]; + + memset(&di, 0, sizeof(disc_information)); + memset(&ti, 0, sizeof(track_information)); + + ret = pkt_get_disc_info(pd, &di); + if (ret) { + pkt_err(pd, "failed get_disc\n"); + return ret; + } + + if (!pkt_writable_disc(pd, &di)) + return -EROFS; + + pd->type = di.erasable ? PACKET_CDRW : PACKET_CDR; + + track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */ + ret = pkt_get_track_info(pd, track, 1, &ti); + if (ret) { + pkt_err(pd, "failed get_track\n"); + return ret; + } + + if (!pkt_writable_track(pd, &ti)) { + pkt_err(pd, "can't write to this track\n"); + return -EROFS; + } + + /* + * we keep packet size in 512 byte units, makes it easier to + * deal with request calculations. + */ + pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2; + if (pd->settings.size == 0) { + pkt_notice(pd, "detected zero packet size!\n"); + return -ENXIO; + } + if (pd->settings.size > PACKET_MAX_SECTORS) { + pkt_err(pd, "packet size is too big\n"); + return -EROFS; + } + pd->settings.fp = ti.fp; + pd->offset = (be32_to_cpu(ti.track_start) << 2) & (pd->settings.size - 1); + + if (ti.nwa_v) { + pd->nwa = be32_to_cpu(ti.next_writable); + set_bit(PACKET_NWA_VALID, &pd->flags); + } + + /* + * in theory we could use lra on -RW media as well and just zero + * blocks that haven't been written yet, but in practice that + * is just a no-go. we'll use that for -R, naturally. + */ + if (ti.lra_v) { + pd->lra = be32_to_cpu(ti.last_rec_address); + set_bit(PACKET_LRA_VALID, &pd->flags); + } else { + pd->lra = 0xffffffff; + set_bit(PACKET_LRA_VALID, &pd->flags); + } + + /* + * fine for now + */ + pd->settings.link_loss = 7; + pd->settings.write_type = 0; /* packet */ + pd->settings.track_mode = ti.track_mode; + + /* + * mode1 or mode2 disc + */ + switch (ti.data_mode) { + case PACKET_MODE1: + pd->settings.block_mode = PACKET_BLOCK_MODE1; + break; + case PACKET_MODE2: + pd->settings.block_mode = PACKET_BLOCK_MODE2; + break; + default: + pkt_err(pd, "unknown data mode\n"); + return -EROFS; + } + return 0; +} + +/* + * enable/disable write caching on drive + */ +static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd, + int set) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + unsigned char buf[64]; + int ret; + + init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); + cgc.sshdr = &sshdr; + cgc.buflen = pd->mode_offset + 12; + + /* + * caching mode page might not be there, so quiet this command + */ + cgc.quiet = 1; + + ret = pkt_mode_sense(pd, &cgc, GPMODE_WCACHING_PAGE, 0); + if (ret) + return ret; + + buf[pd->mode_offset + 10] |= (!!set << 2); + + cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff)); + ret = pkt_mode_select(pd, &cgc); + if (ret) { + pkt_err(pd, "write caching control failed\n"); + pkt_dump_sense(pd, &cgc); + } else if (!ret && set) + pkt_notice(pd, "enabled write caching\n"); + return ret; +} + +static int pkt_lock_door(struct pktcdvd_device *pd, int lockflag) +{ + struct packet_command cgc; + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.cmd[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL; + cgc.cmd[4] = lockflag ? 1 : 0; + return pkt_generic_packet(pd, &cgc); +} + +/* + * Returns drive maximum write speed + */ +static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd, + unsigned *write_speed) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + unsigned char buf[256+18]; + unsigned char *cap_buf; + int ret, offset; + + cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset]; + init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN); + cgc.sshdr = &sshdr; + + ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0); + if (ret) { + cgc.buflen = pd->mode_offset + cap_buf[1] + 2 + + sizeof(struct mode_page_header); + ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + } + + offset = 20; /* Obsoleted field, used by older drives */ + if (cap_buf[1] >= 28) + offset = 28; /* Current write speed selected */ + if (cap_buf[1] >= 30) { + /* If the drive reports at least one "Logical Unit Write + * Speed Performance Descriptor Block", use the information + * in the first block. (contains the highest speed) + */ + int num_spdb = (cap_buf[30] << 8) + cap_buf[31]; + if (num_spdb > 0) + offset = 34; + } + + *write_speed = (cap_buf[offset] << 8) | cap_buf[offset + 1]; + return 0; +} + +/* These tables from cdrecord - I don't have orange book */ +/* standard speed CD-RW (1-4x) */ +static char clv_to_speed[16] = { + /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ + 0, 2, 4, 6, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +/* high speed CD-RW (-10x) */ +static char hs_clv_to_speed[16] = { + /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ + 0, 2, 4, 6, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +/* ultra high speed CD-RW */ +static char us_clv_to_speed[16] = { + /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ + 0, 2, 4, 8, 0, 0,16, 0,24,32,40,48, 0, 0, 0, 0 +}; + +/* + * reads the maximum media speed from ATIP + */ +static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd, + unsigned *speed) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + unsigned char buf[64]; + unsigned int size, st, sp; + int ret; + + init_cdrom_command(&cgc, buf, 2, CGC_DATA_READ); + cgc.sshdr = &sshdr; + cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP; + cgc.cmd[1] = 2; + cgc.cmd[2] = 4; /* READ ATIP */ + cgc.cmd[8] = 2; + ret = pkt_generic_packet(pd, &cgc); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + size = ((unsigned int) buf[0]<<8) + buf[1] + 2; + if (size > sizeof(buf)) + size = sizeof(buf); + + init_cdrom_command(&cgc, buf, size, CGC_DATA_READ); + cgc.sshdr = &sshdr; + cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP; + cgc.cmd[1] = 2; + cgc.cmd[2] = 4; + cgc.cmd[8] = size; + ret = pkt_generic_packet(pd, &cgc); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + + if (!(buf[6] & 0x40)) { + pkt_notice(pd, "disc type is not CD-RW\n"); + return 1; + } + if (!(buf[6] & 0x4)) { + pkt_notice(pd, "A1 values on media are not valid, maybe not CDRW?\n"); + return 1; + } + + st = (buf[6] >> 3) & 0x7; /* disc sub-type */ + + sp = buf[16] & 0xf; /* max speed from ATIP A1 field */ + + /* Info from cdrecord */ + switch (st) { + case 0: /* standard speed */ + *speed = clv_to_speed[sp]; + break; + case 1: /* high speed */ + *speed = hs_clv_to_speed[sp]; + break; + case 2: /* ultra high speed */ + *speed = us_clv_to_speed[sp]; + break; + default: + pkt_notice(pd, "unknown disc sub-type %d\n", st); + return 1; + } + if (*speed) { + pkt_info(pd, "maximum media speed: %d\n", *speed); + return 0; + } else { + pkt_notice(pd, "unknown speed %d for sub-type %d\n", sp, st); + return 1; + } +} + +static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + int ret; + + pkt_dbg(2, pd, "Performing OPC\n"); + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.sshdr = &sshdr; + cgc.timeout = 60*HZ; + cgc.cmd[0] = GPCMD_SEND_OPC; + cgc.cmd[1] = 1; + ret = pkt_generic_packet(pd, &cgc); + if (ret) + pkt_dump_sense(pd, &cgc); + return ret; +} + +static int pkt_open_write(struct pktcdvd_device *pd) +{ + int ret; + unsigned int write_speed, media_write_speed, read_speed; + + ret = pkt_probe_settings(pd); + if (ret) { + pkt_dbg(2, pd, "failed probe\n"); + return ret; + } + + ret = pkt_set_write_settings(pd); + if (ret) { + pkt_dbg(1, pd, "failed saving write settings\n"); + return -EIO; + } + + pkt_write_caching(pd, USE_WCACHING); + + ret = pkt_get_max_speed(pd, &write_speed); + if (ret) + write_speed = 16 * 177; + switch (pd->mmc3_profile) { + case 0x13: /* DVD-RW */ + case 0x1a: /* DVD+RW */ + case 0x12: /* DVD-RAM */ + pkt_dbg(1, pd, "write speed %ukB/s\n", write_speed); + break; + default: + ret = pkt_media_speed(pd, &media_write_speed); + if (ret) + media_write_speed = 16; + write_speed = min(write_speed, media_write_speed * 177); + pkt_dbg(1, pd, "write speed %ux\n", write_speed / 176); + break; + } + read_speed = write_speed; + + ret = pkt_set_speed(pd, write_speed, read_speed); + if (ret) { + pkt_dbg(1, pd, "couldn't set write speed\n"); + return -EIO; + } + pd->write_speed = write_speed; + pd->read_speed = read_speed; + + ret = pkt_perform_opc(pd); + if (ret) { + pkt_dbg(1, pd, "Optimum Power Calibration failed\n"); + } + + return 0; +} + +/* + * called at open time. + */ +static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) +{ + int ret; + long lba; + struct request_queue *q; + struct block_device *bdev; + + /* + * We need to re-open the cdrom device without O_NONBLOCK to be able + * to read/write from/to it. It is already opened in O_NONBLOCK mode + * so open should not fail. + */ + bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); + goto out; + } + + ret = pkt_get_last_written(pd, &lba); + if (ret) { + pkt_err(pd, "pkt_get_last_written failed\n"); + goto out_putdev; + } + + set_capacity(pd->disk, lba << 2); + set_capacity_and_notify(pd->bdev->bd_disk, lba << 2); + + q = bdev_get_queue(pd->bdev); + if (write) { + ret = pkt_open_write(pd); + if (ret) + goto out_putdev; + /* + * Some CDRW drives can not handle writes larger than one packet, + * even if the size is a multiple of the packet size. + */ + blk_queue_max_hw_sectors(q, pd->settings.size); + set_bit(PACKET_WRITABLE, &pd->flags); + } else { + pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); + clear_bit(PACKET_WRITABLE, &pd->flags); + } + + ret = pkt_set_segment_merging(pd, q); + if (ret) + goto out_putdev; + + if (write) { + if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) { + pkt_err(pd, "not enough memory for buffers\n"); + ret = -ENOMEM; + goto out_putdev; + } + pkt_info(pd, "%lukB available on disc\n", lba << 1); + } + + return 0; + +out_putdev: + blkdev_put(bdev, FMODE_READ | FMODE_EXCL); +out: + return ret; +} + +/* + * called when the device is closed. makes sure that the device flushes + * the internal cache before we close. + */ +static void pkt_release_dev(struct pktcdvd_device *pd, int flush) +{ + if (flush && pkt_flush_cache(pd)) + pkt_dbg(1, pd, "not flushing cache\n"); + + pkt_lock_door(pd, 0); + + pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); + blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL); + + pkt_shrink_pktlist(pd); +} + +static struct pktcdvd_device *pkt_find_dev_from_minor(unsigned int dev_minor) +{ + if (dev_minor >= MAX_WRITERS) + return NULL; + + dev_minor = array_index_nospec(dev_minor, MAX_WRITERS); + return pkt_devs[dev_minor]; +} + +static int pkt_open(struct block_device *bdev, fmode_t mode) +{ + struct pktcdvd_device *pd = NULL; + int ret; + + mutex_lock(&pktcdvd_mutex); + mutex_lock(&ctl_mutex); + pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev)); + if (!pd) { + ret = -ENODEV; + goto out; + } + BUG_ON(pd->refcnt < 0); + + pd->refcnt++; + if (pd->refcnt > 1) { + if ((mode & FMODE_WRITE) && + !test_bit(PACKET_WRITABLE, &pd->flags)) { + ret = -EBUSY; + goto out_dec; + } + } else { + ret = pkt_open_dev(pd, mode & FMODE_WRITE); + if (ret) + goto out_dec; + /* + * needed here as well, since ext2 (among others) may change + * the blocksize at mount time + */ + set_blocksize(bdev, CD_FRAMESIZE); + } + + mutex_unlock(&ctl_mutex); + mutex_unlock(&pktcdvd_mutex); + return 0; + +out_dec: + pd->refcnt--; +out: + mutex_unlock(&ctl_mutex); + mutex_unlock(&pktcdvd_mutex); + return ret; +} + +static void pkt_close(struct gendisk *disk, fmode_t mode) +{ + struct pktcdvd_device *pd = disk->private_data; + + mutex_lock(&pktcdvd_mutex); + mutex_lock(&ctl_mutex); + pd->refcnt--; + BUG_ON(pd->refcnt < 0); + if (pd->refcnt == 0) { + int flush = test_bit(PACKET_WRITABLE, &pd->flags); + pkt_release_dev(pd, flush); + } + mutex_unlock(&ctl_mutex); + mutex_unlock(&pktcdvd_mutex); +} + + +static void pkt_end_io_read_cloned(struct bio *bio) +{ + struct packet_stacked_data *psd = bio->bi_private; + struct pktcdvd_device *pd = psd->pd; + + psd->bio->bi_status = bio->bi_status; + bio_put(bio); + bio_endio(psd->bio); + mempool_free(psd, &psd_pool); + pkt_bio_finished(pd); +} + +static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio) +{ + struct bio *cloned_bio = + bio_alloc_clone(pd->bdev, bio, GFP_NOIO, &pkt_bio_set); + struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO); + + psd->pd = pd; + psd->bio = bio; + cloned_bio->bi_private = psd; + cloned_bio->bi_end_io = pkt_end_io_read_cloned; + pd->stats.secs_r += bio_sectors(bio); + pkt_queue_bio(pd, cloned_bio); +} + +static void pkt_make_request_write(struct request_queue *q, struct bio *bio) +{ + struct pktcdvd_device *pd = q->queuedata; + sector_t zone; + struct packet_data *pkt; + int was_empty, blocked_bio; + struct pkt_rb_node *node; + + zone = get_zone(bio->bi_iter.bi_sector, pd); + + /* + * If we find a matching packet in state WAITING or READ_WAIT, we can + * just append this bio to that packet. + */ + spin_lock(&pd->cdrw.active_list_lock); + blocked_bio = 0; + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (pkt->sector == zone) { + spin_lock(&pkt->lock); + if ((pkt->state == PACKET_WAITING_STATE) || + (pkt->state == PACKET_READ_WAIT_STATE)) { + bio_list_add(&pkt->orig_bios, bio); + pkt->write_size += + bio->bi_iter.bi_size / CD_FRAMESIZE; + if ((pkt->write_size >= pkt->frames) && + (pkt->state == PACKET_WAITING_STATE)) { + atomic_inc(&pkt->run_sm); + wake_up(&pd->wqueue); + } + spin_unlock(&pkt->lock); + spin_unlock(&pd->cdrw.active_list_lock); + return; + } else { + blocked_bio = 1; + } + spin_unlock(&pkt->lock); + } + } + spin_unlock(&pd->cdrw.active_list_lock); + + /* + * Test if there is enough room left in the bio work queue + * (queue size >= congestion on mark). + * If not, wait till the work queue size is below the congestion off mark. + */ + spin_lock(&pd->lock); + if (pd->write_congestion_on > 0 + && pd->bio_queue_size >= pd->write_congestion_on) { + struct wait_bit_queue_entry wqe; + + init_wait_var_entry(&wqe, &pd->congested, 0); + for (;;) { + prepare_to_wait_event(__var_waitqueue(&pd->congested), + &wqe.wq_entry, + TASK_UNINTERRUPTIBLE); + if (pd->bio_queue_size <= pd->write_congestion_off) + break; + pd->congested = true; + spin_unlock(&pd->lock); + schedule(); + spin_lock(&pd->lock); + } + } + spin_unlock(&pd->lock); + + /* + * No matching packet found. Store the bio in the work queue. + */ + node = mempool_alloc(&pd->rb_pool, GFP_NOIO); + node->bio = bio; + spin_lock(&pd->lock); + BUG_ON(pd->bio_queue_size < 0); + was_empty = (pd->bio_queue_size == 0); + pkt_rbtree_insert(pd, node); + spin_unlock(&pd->lock); + + /* + * Wake up the worker thread. + */ + atomic_set(&pd->scan_queue, 1); + if (was_empty) { + /* This wake_up is required for correct operation */ + wake_up(&pd->wqueue); + } else if (!list_empty(&pd->cdrw.pkt_free_list) && !blocked_bio) { + /* + * This wake up is not required for correct operation, + * but improves performance in some cases. + */ + wake_up(&pd->wqueue); + } +} + +static void pkt_submit_bio(struct bio *bio) +{ + struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata; + struct bio *split; + + bio = bio_split_to_limits(bio); + + pkt_dbg(2, pd, "start = %6llx stop = %6llx\n", + (unsigned long long)bio->bi_iter.bi_sector, + (unsigned long long)bio_end_sector(bio)); + + /* + * Clone READ bios so we can have our own bi_end_io callback. + */ + if (bio_data_dir(bio) == READ) { + pkt_make_request_read(pd, bio); + return; + } + + if (!test_bit(PACKET_WRITABLE, &pd->flags)) { + pkt_notice(pd, "WRITE for ro device (%llu)\n", + (unsigned long long)bio->bi_iter.bi_sector); + goto end_io; + } + + if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) { + pkt_err(pd, "wrong bio size\n"); + goto end_io; + } + + do { + sector_t zone = get_zone(bio->bi_iter.bi_sector, pd); + sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd); + + if (last_zone != zone) { + BUG_ON(last_zone != zone + pd->settings.size); + + split = bio_split(bio, last_zone - + bio->bi_iter.bi_sector, + GFP_NOIO, &pkt_bio_set); + bio_chain(split, bio); + } else { + split = bio; + } + + pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split); + } while (split != bio); + + return; +end_io: + bio_io_error(bio); +} + +static void pkt_init_queue(struct pktcdvd_device *pd) +{ + struct request_queue *q = pd->disk->queue; + + blk_queue_logical_block_size(q, CD_FRAMESIZE); + blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS); + q->queuedata = pd; +} + +static int pkt_seq_show(struct seq_file *m, void *p) +{ + struct pktcdvd_device *pd = m->private; + char *msg; + int states[PACKET_NUM_STATES]; + + seq_printf(m, "Writer %s mapped to %pg:\n", pd->name, pd->bdev); + + seq_printf(m, "\nSettings:\n"); + seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2); + + if (pd->settings.write_type == 0) + msg = "Packet"; + else + msg = "Unknown"; + seq_printf(m, "\twrite type:\t\t%s\n", msg); + + seq_printf(m, "\tpacket type:\t\t%s\n", pd->settings.fp ? "Fixed" : "Variable"); + seq_printf(m, "\tlink loss:\t\t%d\n", pd->settings.link_loss); + + seq_printf(m, "\ttrack mode:\t\t%d\n", pd->settings.track_mode); + + if (pd->settings.block_mode == PACKET_BLOCK_MODE1) + msg = "Mode 1"; + else if (pd->settings.block_mode == PACKET_BLOCK_MODE2) + msg = "Mode 2"; + else + msg = "Unknown"; + seq_printf(m, "\tblock mode:\t\t%s\n", msg); + + seq_printf(m, "\nStatistics:\n"); + seq_printf(m, "\tpackets started:\t%lu\n", pd->stats.pkt_started); + seq_printf(m, "\tpackets ended:\t\t%lu\n", pd->stats.pkt_ended); + seq_printf(m, "\twritten:\t\t%lukB\n", pd->stats.secs_w >> 1); + seq_printf(m, "\tread gather:\t\t%lukB\n", pd->stats.secs_rg >> 1); + seq_printf(m, "\tread:\t\t\t%lukB\n", pd->stats.secs_r >> 1); + + seq_printf(m, "\nMisc:\n"); + seq_printf(m, "\treference count:\t%d\n", pd->refcnt); + seq_printf(m, "\tflags:\t\t\t0x%lx\n", pd->flags); + seq_printf(m, "\tread speed:\t\t%ukB/s\n", pd->read_speed); + seq_printf(m, "\twrite speed:\t\t%ukB/s\n", pd->write_speed); + seq_printf(m, "\tstart offset:\t\t%lu\n", pd->offset); + seq_printf(m, "\tmode page offset:\t%u\n", pd->mode_offset); + + seq_printf(m, "\nQueue state:\n"); + seq_printf(m, "\tbios queued:\t\t%d\n", pd->bio_queue_size); + seq_printf(m, "\tbios pending:\t\t%d\n", atomic_read(&pd->cdrw.pending_bios)); + seq_printf(m, "\tcurrent sector:\t\t0x%llx\n", (unsigned long long)pd->current_sector); + + pkt_count_states(pd, states); + seq_printf(m, "\tstate:\t\t\ti:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n", + states[0], states[1], states[2], states[3], states[4], states[5]); + + seq_printf(m, "\twrite congestion marks:\toff=%d on=%d\n", + pd->write_congestion_off, + pd->write_congestion_on); + return 0; +} + +static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) +{ + int i; + struct block_device *bdev; + struct scsi_device *sdev; + + if (pd->pkt_dev == dev) { + pkt_err(pd, "recursive setup not allowed\n"); + return -EBUSY; + } + for (i = 0; i < MAX_WRITERS; i++) { + struct pktcdvd_device *pd2 = pkt_devs[i]; + if (!pd2) + continue; + if (pd2->bdev->bd_dev == dev) { + pkt_err(pd, "%pg already setup\n", pd2->bdev); + return -EBUSY; + } + if (pd2->pkt_dev == dev) { + pkt_err(pd, "can't chain pktcdvd devices\n"); + return -EBUSY; + } + } + + bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + sdev = scsi_device_from_queue(bdev->bd_disk->queue); + if (!sdev) { + blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); + return -EINVAL; + } + put_device(&sdev->sdev_gendev); + + /* This is safe, since we have a reference from open(). */ + __module_get(THIS_MODULE); + + pd->bdev = bdev; + set_blocksize(bdev, CD_FRAMESIZE); + + pkt_init_queue(pd); + + atomic_set(&pd->cdrw.pending_bios, 0); + pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name); + if (IS_ERR(pd->cdrw.thread)) { + pkt_err(pd, "can't start kernel thread\n"); + goto out_mem; + } + + proc_create_single_data(pd->name, 0, pkt_proc, pkt_seq_show, pd); + pkt_dbg(1, pd, "writer mapped to %pg\n", bdev); + return 0; + +out_mem: + blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); + /* This is safe: open() is still holding a reference. */ + module_put(THIS_MODULE); + return -ENOMEM; +} + +static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) +{ + struct pktcdvd_device *pd = bdev->bd_disk->private_data; + int ret; + + pkt_dbg(2, pd, "cmd %x, dev %d:%d\n", + cmd, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); + + mutex_lock(&pktcdvd_mutex); + switch (cmd) { + case CDROMEJECT: + /* + * The door gets locked when the device is opened, so we + * have to unlock it or else the eject command fails. + */ + if (pd->refcnt == 1) + pkt_lock_door(pd, 0); + fallthrough; + /* + * forward selected CDROM ioctls to CD-ROM, for UDF + */ + case CDROMMULTISESSION: + case CDROMREADTOCENTRY: + case CDROM_LAST_WRITTEN: + case CDROM_SEND_PACKET: + case SCSI_IOCTL_SEND_COMMAND: + if (!bdev->bd_disk->fops->ioctl) + ret = -ENOTTY; + else + ret = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); + break; + default: + pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd); + ret = -ENOTTY; + } + mutex_unlock(&pktcdvd_mutex); + + return ret; +} + +static unsigned int pkt_check_events(struct gendisk *disk, + unsigned int clearing) +{ + struct pktcdvd_device *pd = disk->private_data; + struct gendisk *attached_disk; + + if (!pd) + return 0; + if (!pd->bdev) + return 0; + attached_disk = pd->bdev->bd_disk; + if (!attached_disk || !attached_disk->fops->check_events) + return 0; + return attached_disk->fops->check_events(attached_disk, clearing); +} + +static char *pkt_devnode(struct gendisk *disk, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "pktcdvd/%s", disk->disk_name); +} + +static const struct block_device_operations pktcdvd_ops = { + .owner = THIS_MODULE, + .submit_bio = pkt_submit_bio, + .open = pkt_open, + .release = pkt_close, + .ioctl = pkt_ioctl, + .compat_ioctl = blkdev_compat_ptr_ioctl, + .check_events = pkt_check_events, + .devnode = pkt_devnode, +}; + +/* + * Set up mapping from pktcdvd device to CD-ROM device. + */ +static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) +{ + int idx; + int ret = -ENOMEM; + struct pktcdvd_device *pd; + struct gendisk *disk; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + for (idx = 0; idx < MAX_WRITERS; idx++) + if (!pkt_devs[idx]) + break; + if (idx == MAX_WRITERS) { + pr_err("max %d writers supported\n", MAX_WRITERS); + ret = -EBUSY; + goto out_mutex; + } + + pd = kzalloc(sizeof(struct pktcdvd_device), GFP_KERNEL); + if (!pd) + goto out_mutex; + + ret = mempool_init_kmalloc_pool(&pd->rb_pool, PKT_RB_POOL_SIZE, + sizeof(struct pkt_rb_node)); + if (ret) + goto out_mem; + + INIT_LIST_HEAD(&pd->cdrw.pkt_free_list); + INIT_LIST_HEAD(&pd->cdrw.pkt_active_list); + spin_lock_init(&pd->cdrw.active_list_lock); + + spin_lock_init(&pd->lock); + spin_lock_init(&pd->iosched.lock); + bio_list_init(&pd->iosched.read_queue); + bio_list_init(&pd->iosched.write_queue); + sprintf(pd->name, DRIVER_NAME"%d", idx); + init_waitqueue_head(&pd->wqueue); + pd->bio_queue = RB_ROOT; + + pd->write_congestion_on = write_congestion_on; + pd->write_congestion_off = write_congestion_off; + + ret = -ENOMEM; + disk = blk_alloc_disk(NUMA_NO_NODE); + if (!disk) + goto out_mem; + pd->disk = disk; + disk->major = pktdev_major; + disk->first_minor = idx; + disk->minors = 1; + disk->fops = &pktcdvd_ops; + disk->flags = GENHD_FL_REMOVABLE | GENHD_FL_NO_PART; + strcpy(disk->disk_name, pd->name); + disk->private_data = pd; + + pd->pkt_dev = MKDEV(pktdev_major, idx); + ret = pkt_new_dev(pd, dev); + if (ret) + goto out_mem2; + + /* inherit events of the host device */ + disk->events = pd->bdev->bd_disk->events; + + ret = add_disk(disk); + if (ret) + goto out_mem2; + + pkt_sysfs_dev_new(pd); + pkt_debugfs_dev_new(pd); + + pkt_devs[idx] = pd; + if (pkt_dev) + *pkt_dev = pd->pkt_dev; + + mutex_unlock(&ctl_mutex); + return 0; + +out_mem2: + put_disk(disk); +out_mem: + mempool_exit(&pd->rb_pool); + kfree(pd); +out_mutex: + mutex_unlock(&ctl_mutex); + pr_err("setup of pktcdvd device failed\n"); + return ret; +} + +/* + * Tear down mapping from pktcdvd device to CD-ROM device. + */ +static int pkt_remove_dev(dev_t pkt_dev) +{ + struct pktcdvd_device *pd; + int idx; + int ret = 0; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + for (idx = 0; idx < MAX_WRITERS; idx++) { + pd = pkt_devs[idx]; + if (pd && (pd->pkt_dev == pkt_dev)) + break; + } + if (idx == MAX_WRITERS) { + pr_debug("dev not setup\n"); + ret = -ENXIO; + goto out; + } + + if (pd->refcnt > 0) { + ret = -EBUSY; + goto out; + } + if (!IS_ERR(pd->cdrw.thread)) + kthread_stop(pd->cdrw.thread); + + pkt_devs[idx] = NULL; + + pkt_debugfs_dev_remove(pd); + pkt_sysfs_dev_remove(pd); + + blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY); + + remove_proc_entry(pd->name, pkt_proc); + pkt_dbg(1, pd, "writer unmapped\n"); + + del_gendisk(pd->disk); + put_disk(pd->disk); + + mempool_exit(&pd->rb_pool); + kfree(pd); + + /* This is safe: open() is still holding a reference. */ + module_put(THIS_MODULE); + +out: + mutex_unlock(&ctl_mutex); + return ret; +} + +static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd) +{ + struct pktcdvd_device *pd; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index); + if (pd) { + ctrl_cmd->dev = new_encode_dev(pd->bdev->bd_dev); + ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev); + } else { + ctrl_cmd->dev = 0; + ctrl_cmd->pkt_dev = 0; + } + ctrl_cmd->num_devices = MAX_WRITERS; + + mutex_unlock(&ctl_mutex); +} + +static long pkt_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct pkt_ctrl_command ctrl_cmd; + int ret = 0; + dev_t pkt_dev = 0; + + if (cmd != PACKET_CTRL_CMD) + return -ENOTTY; + + if (copy_from_user(&ctrl_cmd, argp, sizeof(struct pkt_ctrl_command))) + return -EFAULT; + + switch (ctrl_cmd.command) { + case PKT_CTRL_CMD_SETUP: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + ret = pkt_setup_dev(new_decode_dev(ctrl_cmd.dev), &pkt_dev); + ctrl_cmd.pkt_dev = new_encode_dev(pkt_dev); + break; + case PKT_CTRL_CMD_TEARDOWN: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + ret = pkt_remove_dev(new_decode_dev(ctrl_cmd.pkt_dev)); + break; + case PKT_CTRL_CMD_STATUS: + pkt_get_status(&ctrl_cmd); + break; + default: + return -ENOTTY; + } + + if (copy_to_user(argp, &ctrl_cmd, sizeof(struct pkt_ctrl_command))) + return -EFAULT; + return ret; +} + +#ifdef CONFIG_COMPAT +static long pkt_ctl_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return pkt_ctl_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); +} +#endif + +static const struct file_operations pkt_ctl_fops = { + .open = nonseekable_open, + .unlocked_ioctl = pkt_ctl_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = pkt_ctl_compat_ioctl, +#endif + .owner = THIS_MODULE, + .llseek = no_llseek, +}; + +static struct miscdevice pkt_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = DRIVER_NAME, + .nodename = "pktcdvd/control", + .fops = &pkt_ctl_fops +}; + +static int __init pkt_init(void) +{ + int ret; + + mutex_init(&ctl_mutex); + + ret = mempool_init_kmalloc_pool(&psd_pool, PSD_POOL_SIZE, + sizeof(struct packet_stacked_data)); + if (ret) + return ret; + ret = bioset_init(&pkt_bio_set, BIO_POOL_SIZE, 0, 0); + if (ret) { + mempool_exit(&psd_pool); + return ret; + } + + ret = register_blkdev(pktdev_major, DRIVER_NAME); + if (ret < 0) { + pr_err("unable to register block device\n"); + goto out2; + } + if (!pktdev_major) + pktdev_major = ret; + + ret = pkt_sysfs_init(); + if (ret) + goto out; + + pkt_debugfs_init(); + + ret = misc_register(&pkt_misc); + if (ret) { + pr_err("unable to register misc device\n"); + goto out_misc; + } + + pkt_proc = proc_mkdir("driver/"DRIVER_NAME, NULL); + + return 0; + +out_misc: + pkt_debugfs_cleanup(); + pkt_sysfs_cleanup(); +out: + unregister_blkdev(pktdev_major, DRIVER_NAME); +out2: + mempool_exit(&psd_pool); + bioset_exit(&pkt_bio_set); + return ret; +} + +static void __exit pkt_exit(void) +{ + remove_proc_entry("driver/"DRIVER_NAME, NULL); + misc_deregister(&pkt_misc); + + pkt_debugfs_cleanup(); + pkt_sysfs_cleanup(); + + unregister_blkdev(pktdev_major, DRIVER_NAME); + mempool_exit(&psd_pool); + bioset_exit(&pkt_bio_set); +} + +MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives"); +MODULE_AUTHOR("Jens Axboe <axboe@suse.de>"); +MODULE_LICENSE("GPL"); + +module_init(pkt_init); +module_exit(pkt_exit); diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index c76e0148eada..574e470b220b 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -587,6 +587,8 @@ static void ps3vram_submit_bio(struct bio *bio) dev_dbg(&dev->core, "%s\n", __func__); bio = bio_split_to_limits(bio); + if (!bio) + return; spin_lock_irq(&priv->lock); busy = !bio_list_empty(&priv->list); diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index e9de9d846b73..17b677b5d3b2 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1992,6 +1992,9 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; int ret = -EINVAL; + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + ublk_ctrl_cmd_dump(cmd); if (!(issue_flags & IO_URING_F_SQE128)) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index c0227dfa4688..4807af1d5805 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -524,7 +524,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, return 0; } -static int xen_blkbk_remove(struct xenbus_device *dev) +static void xen_blkbk_remove(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(&dev->dev); @@ -547,8 +547,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev) /* Put the reference we set in xen_blkif_alloc(). */ xen_blkif_put(be->blkif); } - - return 0; } int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index b28489290323..23ed258b57f0 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2467,7 +2467,7 @@ static void blkback_changed(struct xenbus_device *dev, } } -static int blkfront_remove(struct xenbus_device *xbdev) +static void blkfront_remove(struct xenbus_device *xbdev) { struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); @@ -2488,7 +2488,6 @@ static int blkfront_remove(struct xenbus_device *xbdev) } kfree(info); - return 0; } static int blkfront_is_ready(struct xenbus_device *dev) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index d69905233aff..7e513b771832 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -412,7 +412,9 @@ int tpm_pm_suspend(struct device *dev) } suspended: - return rc; + if (rc) + dev_err(dev, "Ignoring error %d while suspending\n", rc); + return 0; } EXPORT_SYMBOL_GPL(tpm_pm_suspend); diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index 379291826261..80cca3b83b22 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -360,14 +360,13 @@ static int tpmfront_probe(struct xenbus_device *dev, return tpm_chip_register(priv->chip); } -static int tpmfront_remove(struct xenbus_device *dev) +static void tpmfront_remove(struct xenbus_device *dev) { struct tpm_chip *chip = dev_get_drvdata(&dev->dev); struct tpm_private *priv = dev_get_drvdata(&chip->dev); tpm_chip_unregister(chip); ring_free(priv); dev_set_drvdata(&chip->dev, NULL); - return 0; } static int tpmfront_resume(struct xenbus_device *dev) diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c index 53100fb9b07b..12205e2b53b4 100644 --- a/drivers/crypto/atmel-ecc.c +++ b/drivers/crypto/atmel-ecc.c @@ -3,7 +3,7 @@ * Microchip / Atmel ECC (I2C) driver. * * Copyright (c) 2017, Microchip Technology Inc. - * Author: Tudor Ambarus <tudor.ambarus@microchip.com> + * Author: Tudor Ambarus */ #include <linux/delay.h> @@ -411,6 +411,6 @@ static void __exit atmel_ecc_exit(void) module_init(atmel_ecc_init); module_exit(atmel_ecc_exit); -MODULE_AUTHOR("Tudor Ambarus <tudor.ambarus@microchip.com>"); +MODULE_AUTHOR("Tudor Ambarus"); MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c index 81ce09bedda8..55bff1e13142 100644 --- a/drivers/crypto/atmel-i2c.c +++ b/drivers/crypto/atmel-i2c.c @@ -3,7 +3,7 @@ * Microchip / Atmel ECC (I2C) driver. * * Copyright (c) 2017, Microchip Technology Inc. - * Author: Tudor Ambarus <tudor.ambarus@microchip.com> + * Author: Tudor Ambarus */ #include <linux/bitrev.h> @@ -390,6 +390,6 @@ static void __exit atmel_i2c_exit(void) module_init(atmel_i2c_init); module_exit(atmel_i2c_exit); -MODULE_AUTHOR("Tudor Ambarus <tudor.ambarus@microchip.com>"); +MODULE_AUTHOR("Tudor Ambarus"); MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h index 48929efe2a5b..35f7857a7f7c 100644 --- a/drivers/crypto/atmel-i2c.h +++ b/drivers/crypto/atmel-i2c.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2017, Microchip Technology Inc. - * Author: Tudor Ambarus <tudor.ambarus@microchip.com> + * Author: Tudor Ambarus */ #ifndef __ATMEL_I2C_H__ diff --git a/drivers/crypto/caam/blob_gen.c b/drivers/crypto/caam/blob_gen.c index 1f65df489847..f46b161d2cda 100644 --- a/drivers/crypto/caam/blob_gen.c +++ b/drivers/crypto/caam/blob_gen.c @@ -104,7 +104,7 @@ int caam_process_blob(struct caam_blob_priv *priv, } ctrlpriv = dev_get_drvdata(jrdev->parent); - moo = FIELD_GET(CSTA_MOO, ioread32(&ctrlpriv->ctrl->perfmon.status)); + moo = FIELD_GET(CSTA_MOO, rd_reg32(&ctrlpriv->ctrl->perfmon.status)); if (moo != CSTA_MOO_SECURE && moo != CSTA_MOO_TRUSTED) dev_warn(jrdev, "using insecure test key, enable HAB to use unique device key!\n"); diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c index f69d68122b9b..fbf725fae7c1 100644 --- a/drivers/dma-buf/dma-buf-sysfs-stats.c +++ b/drivers/dma-buf/dma-buf-sysfs-stats.c @@ -168,14 +168,11 @@ void dma_buf_uninit_sysfs_statistics(void) kset_unregister(dma_buf_stats_kset); } -int dma_buf_stats_setup(struct dma_buf *dmabuf) +int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file) { struct dma_buf_sysfs_entry *sysfs_entry; int ret; - if (!dmabuf || !dmabuf->file) - return -EINVAL; - if (!dmabuf->exp_name) { pr_err("exporter name must not be empty if stats needed\n"); return -EINVAL; @@ -192,7 +189,7 @@ int dma_buf_stats_setup(struct dma_buf *dmabuf) /* create the directory for buffer stats */ ret = kobject_init_and_add(&sysfs_entry->kobj, &dma_buf_ktype, NULL, - "%lu", file_inode(dmabuf->file)->i_ino); + "%lu", file_inode(file)->i_ino); if (ret) goto err_sysfs_dmabuf; diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.h b/drivers/dma-buf/dma-buf-sysfs-stats.h index a49c6e2650cc..7a8a995b75ba 100644 --- a/drivers/dma-buf/dma-buf-sysfs-stats.h +++ b/drivers/dma-buf/dma-buf-sysfs-stats.h @@ -13,7 +13,7 @@ int dma_buf_init_sysfs_statistics(void); void dma_buf_uninit_sysfs_statistics(void); -int dma_buf_stats_setup(struct dma_buf *dmabuf); +int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file); void dma_buf_stats_teardown(struct dma_buf *dmabuf); #else @@ -25,7 +25,7 @@ static inline int dma_buf_init_sysfs_statistics(void) static inline void dma_buf_uninit_sysfs_statistics(void) {} -static inline int dma_buf_stats_setup(struct dma_buf *dmabuf) +static inline int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file) { return 0; } diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index b6c36914e7c6..e6528767efc7 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -95,10 +95,11 @@ static int dma_buf_file_release(struct inode *inode, struct file *file) return -EINVAL; dmabuf = file->private_data; - - mutex_lock(&db_list.lock); - list_del(&dmabuf->list_node); - mutex_unlock(&db_list.lock); + if (dmabuf) { + mutex_lock(&db_list.lock); + list_del(&dmabuf->list_node); + mutex_unlock(&db_list.lock); + } return 0; } @@ -528,17 +529,17 @@ static inline int is_dma_buf_file(struct file *file) return file->f_op == &dma_buf_fops; } -static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) +static struct file *dma_buf_getfile(size_t size, int flags) { static atomic64_t dmabuf_inode = ATOMIC64_INIT(0); - struct file *file; struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb); + struct file *file; if (IS_ERR(inode)) return ERR_CAST(inode); - inode->i_size = dmabuf->size; - inode_set_bytes(inode, dmabuf->size); + inode->i_size = size; + inode_set_bytes(inode, size); /* * The ->i_ino acquired from get_next_ino() is not unique thus @@ -552,8 +553,6 @@ static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) flags, &dma_buf_fops); if (IS_ERR(file)) goto err_alloc_file; - file->private_data = dmabuf; - file->f_path.dentry->d_fsdata = dmabuf; return file; @@ -619,19 +618,11 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) size_t alloc_size = sizeof(struct dma_buf); int ret; - if (!exp_info->resv) - alloc_size += sizeof(struct dma_resv); - else - /* prevent &dma_buf[1] == dma_buf->resv */ - alloc_size += 1; - - if (WARN_ON(!exp_info->priv - || !exp_info->ops - || !exp_info->ops->map_dma_buf - || !exp_info->ops->unmap_dma_buf - || !exp_info->ops->release)) { + if (WARN_ON(!exp_info->priv || !exp_info->ops + || !exp_info->ops->map_dma_buf + || !exp_info->ops->unmap_dma_buf + || !exp_info->ops->release)) return ERR_PTR(-EINVAL); - } if (WARN_ON(exp_info->ops->cache_sgt_mapping && (exp_info->ops->pin || exp_info->ops->unpin))) @@ -643,10 +634,21 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) if (!try_module_get(exp_info->owner)) return ERR_PTR(-ENOENT); + file = dma_buf_getfile(exp_info->size, exp_info->flags); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto err_module; + } + + if (!exp_info->resv) + alloc_size += sizeof(struct dma_resv); + else + /* prevent &dma_buf[1] == dma_buf->resv */ + alloc_size += 1; dmabuf = kzalloc(alloc_size, GFP_KERNEL); if (!dmabuf) { ret = -ENOMEM; - goto err_module; + goto err_file; } dmabuf->priv = exp_info->priv; @@ -658,43 +660,35 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) init_waitqueue_head(&dmabuf->poll); dmabuf->cb_in.poll = dmabuf->cb_out.poll = &dmabuf->poll; dmabuf->cb_in.active = dmabuf->cb_out.active = 0; + INIT_LIST_HEAD(&dmabuf->attachments); if (!resv) { - resv = (struct dma_resv *)&dmabuf[1]; - dma_resv_init(resv); + dmabuf->resv = (struct dma_resv *)&dmabuf[1]; + dma_resv_init(dmabuf->resv); + } else { + dmabuf->resv = resv; } - dmabuf->resv = resv; - file = dma_buf_getfile(dmabuf, exp_info->flags); - if (IS_ERR(file)) { - ret = PTR_ERR(file); + ret = dma_buf_stats_setup(dmabuf, file); + if (ret) goto err_dmabuf; - } + file->private_data = dmabuf; + file->f_path.dentry->d_fsdata = dmabuf; dmabuf->file = file; - INIT_LIST_HEAD(&dmabuf->attachments); - mutex_lock(&db_list.lock); list_add(&dmabuf->list_node, &db_list.head); mutex_unlock(&db_list.lock); - ret = dma_buf_stats_setup(dmabuf); - if (ret) - goto err_sysfs; - return dmabuf; -err_sysfs: - /* - * Set file->f_path.dentry->d_fsdata to NULL so that when - * dma_buf_release() gets invoked by dentry_ops, it exits - * early before calling the release() dma_buf op. - */ - file->f_path.dentry->d_fsdata = NULL; - fput(file); err_dmabuf: + if (!resv) + dma_resv_fini(dmabuf->resv); kfree(dmabuf); +err_file: + fput(file); err_module: module_put(exp_info->owner); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6b74df446694..e3e2e6e3b485 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -195,6 +195,7 @@ extern int amdgpu_emu_mode; extern uint amdgpu_smu_memory_pool_size; extern int amdgpu_smu_pptable_id; extern uint amdgpu_dc_feature_mask; +extern uint amdgpu_freesync_vid_mode; extern uint amdgpu_dc_debug_mask; extern uint amdgpu_dc_visual_confirm; extern uint amdgpu_dm_abm_level; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b4f2d61ea0d5..1353ffd08988 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -181,6 +181,7 @@ int amdgpu_mes_kiq; int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; int amdgpu_tmz = -1; /* auto */ +uint amdgpu_freesync_vid_mode; int amdgpu_reset_method = -1; /* auto */ int amdgpu_num_kcq = -1; int amdgpu_smartshift_bias; @@ -880,6 +881,32 @@ MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on) module_param_named(tmz, amdgpu_tmz, int, 0444); /** + * DOC: freesync_video (uint) + * Enable the optimization to adjust front porch timing to achieve seamless + * mode change experience when setting a freesync supported mode for which full + * modeset is not needed. + * + * The Display Core will add a set of modes derived from the base FreeSync + * video mode into the corresponding connector's mode list based on commonly + * used refresh rates and VRR range of the connected display, when users enable + * this feature. From the userspace perspective, they can see a seamless mode + * change experience when the change between different refresh rates under the + * same resolution. Additionally, userspace applications such as Video playback + * can read this modeset list and change the refresh rate based on the video + * frame rate. Finally, the userspace can also derive an appropriate mode for a + * particular refresh rate based on the FreeSync Mode and add it to the + * connector's mode list. + * + * Note: This is an experimental feature. + * + * The default value: 0 (off). + */ +MODULE_PARM_DESC( + freesync_video, + "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)"); +module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); + +/** * DOC: reset_method (int) * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index bceb1a5b2518..3fdaba56be6f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -801,7 +801,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, p2plink->attr.name = "properties"; p2plink->attr.mode = KFD_SYSFS_FILE_MODE; - sysfs_attr_init(&iolink->attr); + sysfs_attr_init(&p2plink->attr); ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 86bc23a67d97..1b7f20a9d4ae 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5835,7 +5835,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, */ DRM_DEBUG_DRIVER("No preferred mode found\n"); } else { - recalculate_timing = is_freesync_video_mode(&mode, aconnector); + recalculate_timing = amdgpu_freesync_vid_mode && + is_freesync_video_mode(&mode, aconnector); if (recalculate_timing) { freesync_mode = get_highest_refresh_rate_mode(aconnector, false); drm_mode_copy(&saved_mode, &mode); @@ -6986,7 +6987,7 @@ static void amdgpu_dm_connector_add_freesync_modes(struct drm_connector *connect struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector); - if (!edid) + if (!(amdgpu_freesync_vid_mode && edid)) return; if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) @@ -8850,7 +8851,8 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, * TODO: Refactor this function to allow this check to work * in all conditions. */ - if (dm_new_crtc_state->stream && + if (amdgpu_freesync_vid_mode && + dm_new_crtc_state->stream && is_timing_unchanged_for_freesync(new_crtc_state, old_crtc_state)) goto skip_modeset; @@ -8885,7 +8887,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, if (!dm_old_crtc_state->stream) goto skip_modeset; - if (dm_new_crtc_state->stream && + if (amdgpu_freesync_vid_mode && dm_new_crtc_state->stream && is_timing_unchanged_for_freesync(new_crtc_state, old_crtc_state)) { new_crtc_state->mode_changed = false; @@ -8897,7 +8899,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, set_freesync_fixed_config(dm_new_crtc_state); goto skip_modeset; - } else if (aconnector && + } else if (amdgpu_freesync_vid_mode && aconnector && is_freesync_video_mode(&new_crtc_state->mode, aconnector)) { struct drm_display_mode *high_mode; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 5af601cff1a0..b53feeaf5cf1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -6257,12 +6257,12 @@ bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurface double SwathSizePerSurfaceC[DC__NUM_DPP__MAX]; bool NotEnoughDETSwathFillLatencyHiding = false; - /* calculate sum of single swath size for all pipes in bytes*/ + /* calculate sum of single swath size for all pipes in bytes */ for (k = 0; k < NumberOfActiveSurfaces; k++) { - SwathSizePerSurfaceY[k] += SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; + SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; if (SwathHeightC[k] != 0) - SwathSizePerSurfaceC[k] += SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; + SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; else SwathSizePerSurfaceC[k] = 0; diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index 9f1c209d9251..0616b73175f3 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -151,6 +151,22 @@ DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops, vgpu_scan_nonprivbb_get, vgpu_scan_nonprivbb_set, "0x%llx\n"); +static int vgpu_status_get(void *data, u64 *val) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)data; + + *val = 0; + + if (test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) + *val |= (1 << INTEL_VGPU_STATUS_ATTACHED); + if (test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status)) + *val |= (1 << INTEL_VGPU_STATUS_ACTIVE); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(vgpu_status_fops, vgpu_status_get, NULL, "0x%llx\n"); + /** * intel_gvt_debugfs_add_vgpu - register debugfs entries for a vGPU * @vgpu: a vGPU @@ -162,11 +178,12 @@ void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) snprintf(name, 16, "vgpu%d", vgpu->id); vgpu->debugfs = debugfs_create_dir(name, vgpu->gvt->debugfs_root); - debugfs_create_bool("active", 0444, vgpu->debugfs, &vgpu->active); debugfs_create_file("mmio_diff", 0444, vgpu->debugfs, vgpu, &vgpu_mmio_diff_fops); debugfs_create_file("scan_nonprivbb", 0644, vgpu->debugfs, vgpu, &vgpu_scan_nonprivbb_fops); + debugfs_create_file("status", 0644, vgpu->debugfs, vgpu, + &vgpu_status_fops); } /** @@ -175,8 +192,13 @@ void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) */ void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu) { - debugfs_remove_recursive(vgpu->debugfs); - vgpu->debugfs = NULL; + struct intel_gvt *gvt = vgpu->gvt; + struct drm_minor *minor = gvt->gt->i915->drm.primary; + + if (minor->debugfs_root && gvt->debugfs_root) { + debugfs_remove_recursive(vgpu->debugfs); + vgpu->debugfs = NULL; + } } /** @@ -199,6 +221,10 @@ void intel_gvt_debugfs_init(struct intel_gvt *gvt) */ void intel_gvt_debugfs_clean(struct intel_gvt *gvt) { - debugfs_remove_recursive(gvt->debugfs_root); - gvt->debugfs_root = NULL; + struct drm_minor *minor = gvt->gt->i915->drm.primary; + + if (minor->debugfs_root) { + debugfs_remove_recursive(gvt->debugfs_root); + gvt->debugfs_root = NULL; + } } diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 355f1c0e8664..ffe41e9be04f 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -134,7 +134,8 @@ static void dmabuf_gem_object_free(struct kref *kref) struct list_head *pos; struct intel_vgpu_dmabuf_obj *dmabuf_obj; - if (vgpu && vgpu->active && !list_empty(&vgpu->dmabuf_obj_list_head)) { + if (vgpu && test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status) && + !list_empty(&vgpu->dmabuf_obj_list_head)) { list_for_each(pos, &vgpu->dmabuf_obj_list_head) { dmabuf_obj = list_entry(pos, struct intel_vgpu_dmabuf_obj, list); if (dmabuf_obj == obj) { diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 51e5e8fb505b..4ec85308379a 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -55,7 +55,7 @@ static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn) int idx; bool ret; - if (!vgpu->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return false; idx = srcu_read_lock(&kvm->srcu); @@ -1178,7 +1178,7 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu, if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M)) return 0; - if (!vgpu->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return -EINVAL; pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry)); if (is_error_noslot_pfn(pfn)) @@ -1209,10 +1209,8 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, for_each_shadow_entry(sub_spt, &sub_se, sub_index) { ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index, PAGE_SIZE, &dma_addr); - if (ret) { - ppgtt_invalidate_spt(spt); - return ret; - } + if (ret) + goto err; sub_se.val64 = se->val64; /* Copy the PAT field from PDE. */ @@ -1231,6 +1229,17 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, ops->set_pfn(se, sub_spt->shadow_page.mfn); ppgtt_set_shadow_entry(spt, se, index); return 0; +err: + /* Cancel the existing addess mappings of DMA addr. */ + for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) { + gvt_vdbg_mm("invalidate 4K entry\n"); + ppgtt_invalidate_pte(sub_spt, &sub_se); + } + /* Release the new allocated spt. */ + trace_spt_change(sub_spt->vgpu->id, "release", sub_spt, + sub_spt->guest_page.gfn, sub_spt->shadow_page.type); + ppgtt_free_spt(sub_spt); + return ret; } static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 62823c0e13ab..2d65800d8e93 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -172,13 +172,18 @@ struct intel_vgpu_submission { #define KVMGT_DEBUGFS_FILENAME "kvmgt_nr_cache_entries" +enum { + INTEL_VGPU_STATUS_ATTACHED = 0, + INTEL_VGPU_STATUS_ACTIVE, + INTEL_VGPU_STATUS_NR_BITS, +}; + struct intel_vgpu { struct vfio_device vfio_device; struct intel_gvt *gvt; struct mutex vgpu_lock; int id; - bool active; - bool attached; + DECLARE_BITMAP(status, INTEL_VGPU_STATUS_NR_BITS); bool pv_notified; bool failsafe; unsigned int resetting_eng; @@ -467,7 +472,7 @@ void intel_vgpu_write_fence(struct intel_vgpu *vgpu, #define for_each_active_vgpu(gvt, vgpu, id) \ idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) \ - for_each_if(vgpu->active) + for_each_if(test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status)) static inline void intel_vgpu_write_pci_bar(struct intel_vgpu *vgpu, u32 offset, u32 val, bool low) @@ -725,7 +730,7 @@ static inline bool intel_gvt_mmio_is_cmd_write_patch( static inline int intel_gvt_read_gpa(struct intel_vgpu *vgpu, unsigned long gpa, void *buf, unsigned long len) { - if (!vgpu->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return -ESRCH; return vfio_dma_rw(&vgpu->vfio_device, gpa, buf, len, false); } @@ -743,7 +748,7 @@ static inline int intel_gvt_read_gpa(struct intel_vgpu *vgpu, unsigned long gpa, static inline int intel_gvt_write_gpa(struct intel_vgpu *vgpu, unsigned long gpa, void *buf, unsigned long len) { - if (!vgpu->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return -ESRCH; return vfio_dma_rw(&vgpu->vfio_device, gpa, buf, len, true); } diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c index a6b2021b665f..68eca023bbc6 100644 --- a/drivers/gpu/drm/i915/gvt/interrupt.c +++ b/drivers/gpu/drm/i915/gvt/interrupt.c @@ -433,7 +433,7 @@ static int inject_virtual_interrupt(struct intel_vgpu *vgpu) * enabled by guest. so if msi_trigger is null, success is still * returned and don't inject interrupt into guest. */ - if (!vgpu->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return -ESRCH; if (vgpu->msi_trigger && eventfd_signal(vgpu->msi_trigger, 1) != 1) return -EFAULT; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index f5451adcd489..8ae7039b3683 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -638,7 +638,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) mutex_lock(&vgpu->gvt->lock); for_each_active_vgpu(vgpu->gvt, itr, id) { - if (!itr->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, itr->status)) continue; if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) { @@ -655,9 +655,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) { struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); - if (vgpu->attached) - return -EEXIST; - if (!vgpu->vfio_device.kvm || vgpu->vfio_device.kvm->mm != current->mm) { gvt_vgpu_err("KVM is required to use Intel vGPU\n"); @@ -667,14 +664,14 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) if (__kvmgt_vgpu_exist(vgpu)) return -EEXIST; - vgpu->attached = true; - vgpu->track_node.track_write = kvmgt_page_track_write; vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; kvm_get_kvm(vgpu->vfio_device.kvm); kvm_page_track_register_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); + set_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status); + debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs, &vgpu->nr_cache_entries); @@ -698,11 +695,10 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) { struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); - if (!vgpu->attached) - return; - intel_gvt_release_vgpu(vgpu); + clear_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status); + debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs)); kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, @@ -718,8 +714,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) vgpu->dma_addr_cache = RB_ROOT; intel_vgpu_release_msi_eventfd_ctx(vgpu); - - vgpu->attached = false; } static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) @@ -1512,9 +1506,6 @@ static void intel_vgpu_remove(struct mdev_device *mdev) { struct intel_vgpu *vgpu = dev_get_drvdata(&mdev->dev); - if (WARN_ON_ONCE(vgpu->attached)) - return; - vfio_unregister_group_dev(&vgpu->vfio_device); vfio_put_device(&vgpu->vfio_device); } @@ -1559,7 +1550,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) struct kvm_memory_slot *slot; int idx; - if (!info->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status)) return -ESRCH; idx = srcu_read_lock(&kvm->srcu); @@ -1589,8 +1580,8 @@ int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn) struct kvm_memory_slot *slot; int idx; - if (!info->attached) - return 0; + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status)) + return -ESRCH; idx = srcu_read_lock(&kvm->srcu); slot = gfn_to_memslot(kvm, gfn); @@ -1668,7 +1659,7 @@ int intel_gvt_dma_map_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, struct gvt_dma *entry; int ret; - if (!vgpu->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return -EINVAL; mutex_lock(&vgpu->cache_lock); @@ -1714,8 +1705,8 @@ int intel_gvt_dma_pin_guest_page(struct intel_vgpu *vgpu, dma_addr_t dma_addr) struct gvt_dma *entry; int ret = 0; - if (!vgpu->attached) - return -ENODEV; + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) + return -EINVAL; mutex_lock(&vgpu->cache_lock); entry = __gvt_cache_find_dma_addr(vgpu, dma_addr); @@ -1742,7 +1733,7 @@ void intel_gvt_dma_unmap_guest_page(struct intel_vgpu *vgpu, { struct gvt_dma *entry; - if (!vgpu->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return; mutex_lock(&vgpu->cache_lock); @@ -1778,7 +1769,7 @@ static void intel_gvt_test_and_emulate_vblank(struct intel_gvt *gvt) idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) { if (test_and_clear_bit(INTEL_GVT_REQUEST_EMULATE_VBLANK + id, (void *)&gvt->service_request)) { - if (vgpu->active) + if (test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status)) intel_vgpu_emulate_vblank(vgpu); } } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 9cd8fcbf7cad..f4055804aad1 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -695,6 +695,7 @@ intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload) if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT || !workload->shadow_mm->ppgtt_mm.shadowed) { + intel_vgpu_unpin_mm(workload->shadow_mm); gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); return -EINVAL; } @@ -865,7 +866,8 @@ pick_next_workload(struct intel_gvt *gvt, struct intel_engine_cs *engine) goto out; } - if (!scheduler->current_vgpu->active || + if (!test_bit(INTEL_VGPU_STATUS_ACTIVE, + scheduler->current_vgpu->status) || list_empty(workload_q_head(scheduler->current_vgpu, engine))) goto out; diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 3c529c2705dd..a5497440484f 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -166,9 +166,7 @@ void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt) */ void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu) { - mutex_lock(&vgpu->vgpu_lock); - vgpu->active = true; - mutex_unlock(&vgpu->vgpu_lock); + set_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status); } /** @@ -183,7 +181,7 @@ void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu) { mutex_lock(&vgpu->vgpu_lock); - vgpu->active = false; + clear_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status); if (atomic_read(&vgpu->submission.running_workload_num)) { mutex_unlock(&vgpu->vgpu_lock); @@ -228,7 +226,8 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) struct intel_gvt *gvt = vgpu->gvt; struct drm_i915_private *i915 = gvt->gt->i915; - drm_WARN(&i915->drm, vgpu->active, "vGPU is still active!\n"); + drm_WARN(&i915->drm, test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status), + "vGPU is still active!\n"); /* * remove idr first so later clean can judge if need to stop @@ -285,8 +284,7 @@ struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt) if (ret) goto out_free_vgpu; - vgpu->active = false; - + clear_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status); return vgpu; out_free_vgpu: diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index dba4f7d81d69..80142d9a4a55 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -614,6 +614,11 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, break; } + if (ipu_plane->dp_flow == IPU_DP_FLOW_SYNC_BG) + width = ipu_src_rect_width(new_state); + else + width = drm_rect_width(&new_state->src) >> 16; + eba = drm_plane_state_to_eba(new_state, 0); /* @@ -622,8 +627,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, */ if (ipu_state->use_pre) { axi_id = ipu_chan_assign_axi_id(ipu_plane->dma); - ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, - ipu_src_rect_width(new_state), + ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, width, drm_rect_height(&new_state->src) >> 16, fb->pitches[0], fb->format->format, fb->modifier, &eba); @@ -678,9 +682,8 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, break; } - ipu_dmfc_config_wait4eot(ipu_plane->dmfc, ALIGN(drm_rect_width(dst), 8)); + ipu_dmfc_config_wait4eot(ipu_plane->dmfc, width); - width = ipu_src_rect_width(new_state); height = drm_rect_height(&new_state->src) >> 16; info = drm_format_info(fb->format->format); ipu_calculate_bursts(width, info->cpp[0], fb->pitches[0], @@ -744,8 +747,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, ipu_cpmem_set_burstsize(ipu_plane->ipu_ch, 16); ipu_cpmem_zero(ipu_plane->alpha_ch); - ipu_cpmem_set_resolution(ipu_plane->alpha_ch, - ipu_src_rect_width(new_state), + ipu_cpmem_set_resolution(ipu_plane->alpha_ch, width, drm_rect_height(&new_state->src) >> 16); ipu_cpmem_set_format_passthrough(ipu_plane->alpha_ch, 8); ipu_cpmem_set_high_priority(ipu_plane->alpha_ch); diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c index d4b907889a21..cd399b0b7181 100644 --- a/drivers/gpu/drm/meson/meson_viu.c +++ b/drivers/gpu/drm/meson/meson_viu.c @@ -436,15 +436,14 @@ void meson_viu_init(struct meson_drm *priv) /* Initialize OSD1 fifo control register */ reg = VIU_OSD_DDR_PRIORITY_URGENT | - VIU_OSD_HOLD_FIFO_LINES(31) | VIU_OSD_FIFO_DEPTH_VAL(32) | /* fifo_depth_val: 32*8=256 */ VIU_OSD_WORDS_PER_BURST(4) | /* 4 words in 1 burst */ VIU_OSD_FIFO_LIMITS(2); /* fifo_lim: 2*16=32 */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) - reg |= VIU_OSD_BURST_LENGTH_32; + reg |= (VIU_OSD_BURST_LENGTH_32 | VIU_OSD_HOLD_FIFO_LINES(31)); else - reg |= VIU_OSD_BURST_LENGTH_64; + reg |= (VIU_OSD_BURST_LENGTH_64 | VIU_OSD_HOLD_FIFO_LINES(4)); writel_relaxed(reg, priv->io_base + _REG(VIU_OSD1_FIFO_CTRL_STAT)); writel_relaxed(reg, priv->io_base + _REG(VIU_OSD2_FIFO_CTRL_STAT)); diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 2fa5afe21288..919e6cc04982 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -82,6 +82,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, struct panfrost_gem_object *bo; struct drm_panfrost_create_bo *args = data; struct panfrost_gem_mapping *mapping; + int ret; if (!args->size || args->pad || (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP))) @@ -92,21 +93,29 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, !(args->flags & PANFROST_BO_NOEXEC)) return -EINVAL; - bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags, - &args->handle); + bo = panfrost_gem_create(dev, args->size, args->flags); if (IS_ERR(bo)) return PTR_ERR(bo); + ret = drm_gem_handle_create(file, &bo->base.base, &args->handle); + if (ret) + goto out; + mapping = panfrost_gem_mapping_get(bo, priv); - if (!mapping) { - drm_gem_object_put(&bo->base.base); - return -EINVAL; + if (mapping) { + args->offset = mapping->mmnode.start << PAGE_SHIFT; + panfrost_gem_mapping_put(mapping); + } else { + /* This can only happen if the handle from + * drm_gem_handle_create() has already been guessed and freed + * by user space + */ + ret = -EINVAL; } - args->offset = mapping->mmnode.start << PAGE_SHIFT; - panfrost_gem_mapping_put(mapping); - - return 0; +out: + drm_gem_object_put(&bo->base.base); + return ret; } /** diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 293e799e2fe8..3c812fbd126f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -235,12 +235,8 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t } struct panfrost_gem_object * -panfrost_gem_create_with_handle(struct drm_file *file_priv, - struct drm_device *dev, size_t size, - u32 flags, - uint32_t *handle) +panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) { - int ret; struct drm_gem_shmem_object *shmem; struct panfrost_gem_object *bo; @@ -256,16 +252,6 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv, bo->noexec = !!(flags & PANFROST_BO_NOEXEC); bo->is_heap = !!(flags & PANFROST_BO_HEAP); - /* - * Allocate an id of idr table where the obj is registered - * and handle has the id what user can see. - */ - ret = drm_gem_handle_create(file_priv, &shmem->base, handle); - /* drop reference from allocate - handle holds it now. */ - drm_gem_object_put(&shmem->base); - if (ret) - return ERR_PTR(ret); - return bo; } diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index 8088d5fd8480..ad2877eeeccd 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -69,10 +69,7 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev, struct sg_table *sgt); struct panfrost_gem_object * -panfrost_gem_create_with_handle(struct drm_file *file_priv, - struct drm_device *dev, size_t size, - u32 flags, - uint32_t *handle); +panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags); int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv); void panfrost_gem_close(struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index fe09e5be79bd..15d04a0ec623 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -81,7 +81,7 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, init_completion(&entity->entity_idle); /* We start in an idle state. */ - complete(&entity->entity_idle); + complete_all(&entity->entity_idle); spin_lock_init(&entity->rq_lock); spsc_queue_init(&entity->job_queue); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 31f3a1267be4..fd22d753b4ed 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -987,7 +987,7 @@ static int drm_sched_main(void *param) sched_job = drm_sched_entity_pop_job(entity); if (!sched_job) { - complete(&entity->entity_idle); + complete_all(&entity->entity_idle); continue; } @@ -998,7 +998,7 @@ static int drm_sched_main(void *param) trace_drm_run_job(sched_job, entity); fence = sched->ops->run_job(sched_job); - complete(&entity->entity_idle); + complete_all(&entity->entity_idle); drm_sched_fence_scheduled(s_fence); if (!IS_ERR_OR_NULL(fence)) { diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile index b29ef1085cad..f896ef85c2f2 100644 --- a/drivers/gpu/drm/tests/Makefile +++ b/drivers/gpu/drm/tests/Makefile @@ -12,3 +12,5 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \ drm_mm_test.o \ drm_plane_helper_test.o \ drm_rect_test.o + +CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN) diff --git a/drivers/gpu/drm/tests/drm_mm_test.c b/drivers/gpu/drm/tests/drm_mm_test.c index 89f12d3b4a21..186b28dc7038 100644 --- a/drivers/gpu/drm/tests/drm_mm_test.c +++ b/drivers/gpu/drm/tests/drm_mm_test.c @@ -298,9 +298,9 @@ static bool expect_reserve_fail(struct kunit *test, struct drm_mm *mm, struct dr return false; } -static bool check_reserve_boundaries(struct kunit *test, struct drm_mm *mm, - unsigned int count, - u64 size) +static bool noinline_for_stack check_reserve_boundaries(struct kunit *test, struct drm_mm *mm, + unsigned int count, + u64 size) { const struct boundary { u64 start, size; diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 8d7728181de0..c7e74cf13022 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -184,7 +184,7 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, struct virtio_gpu_object_array *objs = NULL; struct drm_gem_shmem_object *shmem_obj; struct virtio_gpu_object *bo; - struct virtio_gpu_mem_entry *ents; + struct virtio_gpu_mem_entry *ents = NULL; unsigned int nents; int ret; @@ -210,7 +210,7 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, ret = -ENOMEM; objs = virtio_gpu_array_alloc(1); if (!objs) - goto err_put_id; + goto err_free_entry; virtio_gpu_array_add_obj(objs, &bo->base.base); ret = virtio_gpu_array_lock_resv(objs); @@ -239,6 +239,8 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, err_put_objs: virtio_gpu_array_put_free(objs); +err_free_entry: + kvfree(ents); err_put_id: virtio_gpu_resource_id_put(vgdev, bo->hw_res_handle); err_free_gem: diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c index 0d8e6bd1ccbf..90996c108146 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.c +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -717,7 +717,7 @@ static int xen_drv_probe(struct xenbus_device *xb_dev, return xenbus_switch_state(xb_dev, XenbusStateInitialising); } -static int xen_drv_remove(struct xenbus_device *dev) +static void xen_drv_remove(struct xenbus_device *dev) { struct xen_drm_front_info *front_info = dev_get_drvdata(&dev->dev); int to = 100; @@ -751,7 +751,6 @@ static int xen_drv_remove(struct xenbus_device *dev) xen_drm_drv_fini(front_info); xenbus_frontend_closed(dev); - return 0; } static const struct xenbus_device_id xen_driver_ids[] = { diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 945758f39523..3e1272695d99 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -278,7 +278,6 @@ static int do_get_hw_stats(struct ib_device *ibdev, const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); struct mlx5_core_dev *mdev; int ret, num_counters; - u32 mdev_port_num; if (!stats) return -EINVAL; @@ -299,8 +298,9 @@ static int do_get_hw_stats(struct ib_device *ibdev, } if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - mdev = mlx5_ib_get_native_port_mdev(dev, port_num, - &mdev_port_num); + if (!port_num) + port_num = 1; + mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL); if (!mdev) { /* If port is not affiliated yet, its in down state * which doesn't have any counters yet, so it would be diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 40d9410ec303..cf953d23d18d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4502,6 +4502,40 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, return false; } +static int validate_rd_atomic(struct mlx5_ib_dev *dev, struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_type qp_type) +{ + int log_max_ra_res; + int log_max_ra_req; + + if (qp_type == MLX5_IB_QPT_DCI) { + log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev, + log_max_ra_res_dc); + log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev, + log_max_ra_req_dc); + } else { + log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev, + log_max_ra_res_qp); + log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev, + log_max_ra_req_qp); + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > log_max_ra_res) { + mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", + attr->max_rd_atomic); + return false; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > log_max_ra_req) { + mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", + attr->max_dest_rd_atomic); + return false; + } + return true; +} + int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -4589,21 +4623,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && - attr->max_rd_atomic > - (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) { - mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", - attr->max_rd_atomic); - goto out; - } - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && - attr->max_dest_rd_atomic > - (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) { - mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", - attr->max_dest_rd_atomic); + if (!validate_rd_atomic(dev, attr, attr_mask, qp_type)) goto out; - } if (cur_state == new_state && cur_state == IB_QPS_RESET) { err = 0; diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 00b0068fda20..5d94db453df3 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -62,9 +62,6 @@ enum { SRP_DEFAULT_CMD_SQ_SIZE = SRP_DEFAULT_QUEUE_SIZE - SRP_RSP_SQ_SIZE - SRP_TSK_MGMT_SQ_SIZE, - SRP_TAG_NO_REQ = ~0U, - SRP_TAG_TSK_MGMT = 1U << 31, - SRP_MAX_PAGES_PER_MR = 512, SRP_MAX_ADD_CDB_LEN = 16, @@ -79,6 +76,11 @@ enum { sizeof(struct srp_imm_buf), }; +enum { + SRP_TAG_NO_REQ = ~0U, + SRP_TAG_TSK_MGMT = BIT(31), +}; + enum srp_target_state { SRP_TARGET_SCANNING, SRP_TARGET_LIVE, diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c index 8d8ebdc2039b..67f1c7364c95 100644 --- a/drivers/input/misc/xen-kbdfront.c +++ b/drivers/input/misc/xen-kbdfront.c @@ -51,7 +51,7 @@ module_param_array(ptr_size, int, NULL, 0444); MODULE_PARM_DESC(ptr_size, "Pointing device width, height in pixels (default 800,600)"); -static int xenkbd_remove(struct xenbus_device *); +static void xenkbd_remove(struct xenbus_device *); static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *); static void xenkbd_disconnect_backend(struct xenkbd_info *); @@ -404,7 +404,7 @@ static int xenkbd_resume(struct xenbus_device *dev) return xenkbd_connect_backend(dev, info); } -static int xenkbd_remove(struct xenbus_device *dev) +static void xenkbd_remove(struct xenbus_device *dev) { struct xenkbd_info *info = dev_get_drvdata(&dev->dev); @@ -417,7 +417,6 @@ static int xenkbd_remove(struct xenbus_device *dev) input_unregister_device(info->mtouch); free_page((unsigned long)info->page); kfree(info); - return 0; } static int xenkbd_connect_backend(struct xenbus_device *dev, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e1ea3a7bd9d9..b424a6ee27ba 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1742,6 +1742,8 @@ static void dm_split_and_process_bio(struct mapped_device *md, * otherwise associated queue_limits won't be imposed. */ bio = bio_split_to_limits(bio); + if (!bio) + return; } init_clone_info(&ci, md, map, bio, is_abnormal); diff --git a/drivers/md/md.c b/drivers/md/md.c index 775f1dde190a..8af639296b3c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -455,6 +455,8 @@ static void md_submit_bio(struct bio *bio) } bio = bio_split_to_limits(bio); + if (!bio) + return; if (mddev->ro == MD_RDONLY && unlikely(rw == WRITE)) { if (bio_sectors(bio) != 0) diff --git a/drivers/mtd/parsers/scpart.c b/drivers/mtd/parsers/scpart.c index 02601bb33de4..6e5e11c37078 100644 --- a/drivers/mtd/parsers/scpart.c +++ b/drivers/mtd/parsers/scpart.c @@ -50,7 +50,7 @@ static int scpart_scan_partmap(struct mtd_info *master, loff_t partmap_offs, int cnt = 0; int res = 0; int res2; - loff_t offs; + uint32_t offs; size_t retlen; struct sc_part_desc *pdesc = NULL; struct sc_part_desc *tmpdesc; diff --git a/drivers/mtd/parsers/tplink_safeloader.c b/drivers/mtd/parsers/tplink_safeloader.c index f601e7bd8627..1c689dafca2a 100644 --- a/drivers/mtd/parsers/tplink_safeloader.c +++ b/drivers/mtd/parsers/tplink_safeloader.c @@ -91,7 +91,7 @@ static int mtd_parser_tplink_safeloader_parse(struct mtd_info *mtd, buf = mtd_parser_tplink_safeloader_read_table(mtd); if (!buf) { err = -ENOENT; - goto err_out; + goto err_free_parts; } for (idx = 0, offset = TPLINK_SAFELOADER_DATA_OFFSET; @@ -118,6 +118,8 @@ static int mtd_parser_tplink_safeloader_parse(struct mtd_info *mtd, err_free: for (idx -= 1; idx >= 0; idx--) kfree(parts[idx].name); +err_free_parts: + kfree(parts); err_out: return err; }; diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index d8703d7dfd0a..d67c926bca8b 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -10,6 +10,7 @@ #include <linux/err.h> #include <linux/errno.h> #include <linux/module.h> +#include <linux/delay.h> #include <linux/device.h> #include <linux/mutex.h> #include <linux/math64.h> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 16ce7a90610c..240a7e8a7652 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -993,7 +993,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, DMA_ATTR_WEAK_ORDERING); skb = build_skb(page_address(page), PAGE_SIZE); if (!skb) { - __free_page(page); + page_pool_recycle_direct(rxr->page_pool, page); return NULL; } skb_mark_for_recycle(skb); @@ -1031,7 +1031,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, skb = napi_alloc_skb(&rxr->bnapi->napi, payload); if (!skb) { - __free_page(page); + page_pool_recycle_direct(rxr->page_pool, page); return NULL; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 081bd2c3f289..e84e5be8e59e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -3130,7 +3130,7 @@ static int hclgevf_set_channels(struct hnae3_handle *handle, u32 new_tqps_num, hclgevf_update_rss_size(handle, new_tqps_num); - hclge_comm_get_rss_tc_info(cur_rss_size, hdev->hw_tc_map, + hclge_comm_get_rss_tc_info(kinfo->rss_size, hdev->hw_tc_map, tc_offset, tc_valid, tc_size); ret = hclge_comm_set_rss_tc_mode(&hdev->hw.hw, tc_offset, tc_valid, tc_size); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index c4e451ef7942..adc02adef83a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3850,7 +3850,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, field_flags |= IAVF_CLOUD_FIELD_IIP; } else { dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n", - be32_to_cpu(match.mask->dst)); + be32_to_cpu(match.mask->src)); return -EINVAL; } } diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index b5a7f246d230..43e199b5b513 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -363,6 +363,7 @@ ice_gnss_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) /* Send the data out to a hardware port */ write_buf = kzalloc(sizeof(*write_buf), GFP_KERNEL); if (!write_buf) { + kfree(cmd_buf); err = -ENOMEM; goto exit; } @@ -460,6 +461,9 @@ static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf) for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) { pf->gnss_tty_port[i] = kzalloc(sizeof(*pf->gnss_tty_port[i]), GFP_KERNEL); + if (!pf->gnss_tty_port[i]) + goto err_out; + pf->gnss_serial[i] = NULL; tty_port_init(pf->gnss_tty_port[i]); @@ -469,21 +473,23 @@ static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf) err = tty_register_driver(tty_driver); if (err) { dev_err(dev, "Failed to register TTY driver err=%d\n", err); - - for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) { - tty_port_destroy(pf->gnss_tty_port[i]); - kfree(pf->gnss_tty_port[i]); - } - kfree(ttydrv_name); - tty_driver_kref_put(pf->ice_gnss_tty_driver); - - return NULL; + goto err_out; } for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) dev_info(dev, "%s%d registered\n", ttydrv_name, i); return tty_driver; + +err_out: + while (i--) { + tty_port_destroy(pf->gnss_tty_port[i]); + kfree(pf->gnss_tty_port[i]); + } + kfree(ttydrv_name); + tty_driver_kref_put(pf->ice_gnss_tty_driver); + + return NULL; } /** diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 0e23f6244ffb..9dec3563ce3a 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -475,7 +475,9 @@ #define IGC_TSAUXC_EN_TT0 BIT(0) /* Enable target time 0. */ #define IGC_TSAUXC_EN_TT1 BIT(1) /* Enable target time 1. */ #define IGC_TSAUXC_EN_CLK0 BIT(2) /* Enable Configurable Frequency Clock 0. */ +#define IGC_TSAUXC_ST0 BIT(4) /* Start Clock 0 Toggle on Target Time 0. */ #define IGC_TSAUXC_EN_CLK1 BIT(5) /* Enable Configurable Frequency Clock 1. */ +#define IGC_TSAUXC_ST1 BIT(7) /* Start Clock 1 Toggle on Target Time 1. */ #define IGC_TSAUXC_EN_TS0 BIT(8) /* Enable hardware timestamp 0. */ #define IGC_TSAUXC_AUTT0 BIT(9) /* Auxiliary Timestamp Taken. */ #define IGC_TSAUXC_EN_TS1 BIT(10) /* Enable hardware timestamp 0. */ diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 8dbb9f903ca7..c34734d432e0 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -322,7 +322,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, ts = ns_to_timespec64(ns); if (rq->perout.index == 1) { if (use_freq) { - tsauxc_mask = IGC_TSAUXC_EN_CLK1; + tsauxc_mask = IGC_TSAUXC_EN_CLK1 | IGC_TSAUXC_ST1; tsim_mask = 0; } else { tsauxc_mask = IGC_TSAUXC_EN_TT1; @@ -333,7 +333,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, freqout = IGC_FREQOUT1; } else { if (use_freq) { - tsauxc_mask = IGC_TSAUXC_EN_CLK0; + tsauxc_mask = IGC_TSAUXC_EN_CLK0 | IGC_TSAUXC_ST0; tsim_mask = 0; } else { tsauxc_mask = IGC_TSAUXC_EN_TT0; @@ -347,10 +347,12 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, tsauxc = rd32(IGC_TSAUXC); tsim = rd32(IGC_TSIM); if (rq->perout.index == 1) { - tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1); + tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1 | + IGC_TSAUXC_ST1); tsim &= ~IGC_TSICR_TT1; } else { - tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0); + tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0 | + IGC_TSAUXC_ST0); tsim &= ~IGC_TSICR_TT0; } if (on) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 24aa97f993ca..123dca9ce468 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -855,9 +855,11 @@ static struct pci_dev *ixgbe_get_first_secondary_devfn(unsigned int devfn) rp_pdev = pci_get_domain_bus_and_slot(0, 0, devfn); if (rp_pdev && rp_pdev->subordinate) { bus = rp_pdev->subordinate->number; + pci_dev_put(rp_pdev); return pci_get_domain_bus_and_slot(0, bus, 0); } + pci_dev_put(rp_pdev); return NULL; } @@ -874,6 +876,7 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw) struct ixgbe_adapter *adapter = hw->back; struct pci_dev *pdev = adapter->pdev; struct pci_dev *func0_pdev; + bool has_mii = false; /* For the C3000 family of SoCs (x550em_a) the internal ixgbe devices * are always downstream of root ports @ 0000:00:16.0 & 0000:00:17.0 @@ -884,15 +887,16 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw) func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x16, 0)); if (func0_pdev) { if (func0_pdev == pdev) - return true; - else - return false; + has_mii = true; + goto out; } func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x17, 0)); if (func0_pdev == pdev) - return true; + has_mii = true; - return false; +out: + pci_dev_put(func0_pdev); + return has_mii; } /** diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index b2b71fe80d61..724df6398bbe 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -774,9 +774,9 @@ int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable) cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG); if (enable) - cfg |= CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN; + cfg |= DATA_PKT_RX_EN | DATA_PKT_TX_EN; else - cfg &= ~(CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN); + cfg &= ~(DATA_PKT_RX_EN | DATA_PKT_TX_EN); cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index fb2d37676d84..5a20d93004c7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -26,7 +26,6 @@ #define CMR_P2X_SEL_SHIFT 59ULL #define CMR_P2X_SEL_NIX0 1ULL #define CMR_P2X_SEL_NIX1 2ULL -#define CMR_EN BIT_ULL(55) #define DATA_PKT_TX_EN BIT_ULL(53) #define DATA_PKT_RX_EN BIT_ULL(54) #define CGX_LMAC_TYPE_SHIFT 40 diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 86653bb8e403..7f8ffbf79cf7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -758,6 +758,8 @@ static void otx2vf_remove(struct pci_dev *pdev) if (vf->otx2_wq) destroy_workqueue(vf->otx2_wq); otx2_ptp_destroy(vf); + otx2_mcam_flow_del(vf); + otx2_shutdown_tc(vf); otx2vf_disable_mbox_intr(vf); otx2_detach_resources(&vf->mbox); if (test_bit(CN10K_LMTST, &vf->hw.cap_flag)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 24da9c5e63e3..c3c8a7148723 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -2199,15 +2199,9 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) return -EINVAL; } - cmd->stats = kvcalloc(MLX5_CMD_OP_MAX, sizeof(*cmd->stats), GFP_KERNEL); - if (!cmd->stats) - return -ENOMEM; - cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); - if (!cmd->pool) { - err = -ENOMEM; - goto dma_pool_err; - } + if (!cmd->pool) + return -ENOMEM; err = alloc_cmd_page(dev, cmd); if (err) @@ -2292,8 +2286,6 @@ err_free_page: err_free_pool: dma_pool_destroy(cmd->pool); -dma_pool_err: - kvfree(cmd->stats); return err; } @@ -2306,7 +2298,6 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) destroy_msg_cache(dev); free_cmd_page(dev, cmd); dma_pool_destroy(cmd->pool); - kvfree(cmd->stats); } void mlx5_cmd_set_state(struct mlx5_core_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c index 512d43148922..c4378afdec09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c @@ -34,12 +34,6 @@ static int police_act_validate(const struct flow_action_entry *act, return -EOPNOTSUPP; } - if (act->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, - "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c index 8d7d761482d2..50b60fd00946 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c @@ -127,6 +127,7 @@ mlx5e_post_meter_add_rule(struct mlx5e_priv *priv, attr->counter = act_counter; attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; + attr->inner_match_level = MLX5_MATCH_NONE; attr->outer_match_level = MLX5_MATCH_NONE; attr->chain = 0; attr->prio = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index fd07c4cbfd1d..1f62c702b625 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -88,6 +88,8 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], struct udphdr *udp = (struct udphdr *)(buf); struct vxlanhdr *vxh; + if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) + return -EOPNOTSUPP; vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); *ip_proto = IPPROTO_UDP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 9369a580743e..7f6b940830b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -62,6 +62,7 @@ struct mlx5e_macsec_sa { u32 enc_key_id; u32 next_pn; sci_t sci; + ssci_t ssci; salt_t salt; struct rhash_head hash; @@ -358,7 +359,6 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_macsec_obj_attrs obj_attrs; union mlx5e_macsec_rule *macsec_rule; - struct macsec_key *key; int err; obj_attrs.next_pn = sa->next_pn; @@ -368,13 +368,9 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, obj_attrs.aso_pdn = macsec->aso.pdn; obj_attrs.epn_state = sa->epn_state; - key = (is_tx) ? &ctx->sa.tx_sa->key : &ctx->sa.rx_sa->key; - if (sa->epn_state.epn_enabled) { - obj_attrs.ssci = (is_tx) ? cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci) : - cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); - - memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); + obj_attrs.ssci = cpu_to_be32((__force u32)sa->ssci); + memcpy(&obj_attrs.salt, &sa->salt, sizeof(sa->salt)); } obj_attrs.replay_window = ctx->secy->replay_window; @@ -499,10 +495,11 @@ mlx5e_macsec_get_macsec_device_context(const struct mlx5e_macsec *macsec, } static void update_macsec_epn(struct mlx5e_macsec_sa *sa, const struct macsec_key *key, - const pn_t *next_pn_halves) + const pn_t *next_pn_halves, ssci_t ssci) { struct mlx5e_macsec_epn_state *epn_state = &sa->epn_state; + sa->ssci = ssci; sa->salt = key->salt; epn_state->epn_enabled = 1; epn_state->epn_msb = next_pn_halves->upper; @@ -550,7 +547,8 @@ static int mlx5e_macsec_add_txsa(struct macsec_context *ctx) tx_sa->assoc_num = assoc_num; if (secy->xpn) - update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves); + update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves, + ctx_tx_sa->ssci); err = mlx5_create_encryption_key(mdev, ctx->sa.key, secy->key_len, MLX5_ACCEL_OBJ_MACSEC_KEY, @@ -945,7 +943,8 @@ static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx) rx_sa->fs_id = rx_sc->sc_xarray_element->fs_id; if (ctx->secy->xpn) - update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves); + update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves, + ctx_rx_sa->ssci); err = mlx5_create_encryption_key(mdev, ctx->sa.key, ctx->secy->key_len, MLX5_ACCEL_OBJ_MACSEC_KEY, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cef8df9cd42b..6bb0fdaa5efa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4085,6 +4085,9 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, struct mlx5e_vlan_table *vlan; struct mlx5e_params *params; + if (!netif_device_present(netdev)) + return features; + vlan = mlx5e_fs_get_vlan(priv->fs); mutex_lock(&priv->state_lock); params = &priv->channels.params; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index eecaf46c55de..8d29310c7e48 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -191,7 +191,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep) if (err) { netdev_warn(priv->netdev, "vport %d error %d reading stats\n", rep->vport, err); - return; + goto out; } #define MLX5_GET_CTR(p, x) \ @@ -241,6 +241,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep) rep_stats->tx_vport_rdma_multicast_bytes = MLX5_GET_CTR(out, received_ib_multicast.octets); +out: kvfree(out); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index c8820ab22169..3df455f6b168 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -2419,7 +2419,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, priv = mlx5i_epriv(netdev); tstamp = &priv->tstamp; - stats = rq->stats; + stats = &priv->channel_stats[rq->ix]->rq; flags_rqpn = be32_to_cpu(cqe->flags_rqpn); g = (flags_rqpn >> 28) & 3; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 99a7edb88661..0c04a5e7c274 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1434,7 +1434,6 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -1492,8 +1491,10 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, } mutex_unlock(&tc->t_lock); - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); mlx5e_detach_mod_hdr(priv, flow); + } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(priv->mdev, attr->counter); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e455b215c708..c981fa77f439 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -143,7 +143,7 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, if (mlx5_esw_indir_table_decap_vport(attr)) vport = mlx5_esw_indir_table_decap_vport(attr); - if (attr && !attr->chain && esw_attr->int_port) + if (!attr->chain && esw_attr && esw_attr->int_port) metadata = mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port); else @@ -4143,8 +4143,6 @@ int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable, } hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability), - MLX5_UN_SZ_BYTES(hca_cap_union)); MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, 1); err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport, @@ -4236,8 +4234,6 @@ int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable, } hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability), - MLX5_UN_SZ_BYTES(hca_cap_union)); MLX5_SET(cmd_hca_cap, hca_caps, roce, enable); err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index c247cca154e9..eff92dc0927c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -90,9 +90,21 @@ static void mlx5i_get_ringparam(struct net_device *dev, static int mlx5i_set_channels(struct net_device *dev, struct ethtool_channels *ch) { - struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv = netdev_priv(dev); + struct mlx5e_priv *epriv = mlx5i_epriv(dev); + + /* rtnl lock protects from race between this ethtool op and sub + * interface ndo_init/uninit. + */ + ASSERT_RTNL(); + if (ipriv->num_sub_interfaces > 0) { + mlx5_core_warn(epriv->mdev, + "can't change number of channels for interfaces with sub interfaces (%u)\n", + ipriv->num_sub_interfaces); + return -EINVAL; + } - return mlx5e_ethtool_set_channels(priv, ch); + return mlx5e_ethtool_set_channels(epriv, ch); } static void mlx5i_get_channels(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index dd4b255c416b..c2a4f86bc890 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -160,6 +160,44 @@ void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_dropped = sstats->tx_queue_dropped; } +struct net_device *mlx5i_parent_get(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv, *parent_ipriv; + struct net_device *parent_dev; + int parent_ifindex; + + ipriv = priv->ppriv; + + parent_ifindex = netdev->netdev_ops->ndo_get_iflink(netdev); + parent_dev = dev_get_by_index(dev_net(netdev), parent_ifindex); + if (!parent_dev) + return NULL; + + parent_ipriv = netdev_priv(parent_dev); + + ASSERT_RTNL(); + parent_ipriv->num_sub_interfaces++; + + ipriv->parent_dev = parent_dev; + + return parent_dev; +} + +void mlx5i_parent_put(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv, *parent_ipriv; + + ipriv = priv->ppriv; + parent_ipriv = netdev_priv(ipriv->parent_dev); + + ASSERT_RTNL(); + parent_ipriv->num_sub_interfaces--; + + dev_put(ipriv->parent_dev); +} + int mlx5i_init_underlay_qp(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index 99d46fda9f82..f3f2af972020 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -54,9 +54,11 @@ struct mlx5i_priv { struct rdma_netdev rn; /* keep this first */ u32 qpn; bool sub_interface; + u32 num_sub_interfaces; u32 qkey; u16 pkey_index; struct mlx5i_pkey_qpn_ht *qpn_htbl; + struct net_device *parent_dev; char *mlx5e_priv[]; }; @@ -117,5 +119,9 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more); void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); +/* Reference management for child to parent interfaces. */ +struct net_device *mlx5i_parent_get(struct net_device *netdev); +void mlx5i_parent_put(struct net_device *netdev); + #endif /* CONFIG_MLX5_CORE_IPOIB */ #endif /* __MLX5E_IPOB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 4d9c9e49645c..03e681297937 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -158,21 +158,28 @@ static int mlx5i_pkey_dev_init(struct net_device *dev) struct mlx5e_priv *priv = mlx5i_epriv(dev); struct mlx5i_priv *ipriv, *parent_ipriv; struct net_device *parent_dev; - int parent_ifindex; ipriv = priv->ppriv; - /* Get QPN to netdevice hash table from parent */ - parent_ifindex = dev->netdev_ops->ndo_get_iflink(dev); - parent_dev = dev_get_by_index(dev_net(dev), parent_ifindex); + /* Link to parent */ + parent_dev = mlx5i_parent_get(dev); if (!parent_dev) { mlx5_core_warn(priv->mdev, "failed to get parent device\n"); return -EINVAL; } + if (dev->num_rx_queues < parent_dev->real_num_rx_queues) { + mlx5_core_warn(priv->mdev, + "failed to create child device with rx queues [%d] less than parent's [%d]\n", + dev->num_rx_queues, + parent_dev->real_num_rx_queues); + mlx5i_parent_put(dev); + return -EINVAL; + } + + /* Get QPN to netdevice hash table from parent */ parent_ipriv = netdev_priv(parent_dev); ipriv->qpn_htbl = parent_ipriv->qpn_htbl; - dev_put(parent_dev); return mlx5i_dev_init(dev); } @@ -184,6 +191,7 @@ static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static void mlx5i_pkey_dev_cleanup(struct net_device *netdev) { + mlx5i_parent_put(netdev); return mlx5i_dev_cleanup(netdev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 69cfe60c558a..69318b143268 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -681,7 +681,7 @@ static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, static const struct ptp_clock_info mlx5_ptp_clock_info = { .owner = THIS_MODULE, .name = "mlx5_ptp", - .max_adj = 100000000, + .max_adj = 50000000, .n_alarm = 0, .n_ext_ts = 0, .n_per_out = 0, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index 74cbe53ee9db..b851141e03de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -3,7 +3,12 @@ #include "dr_types.h" +#if defined(CONFIG_FRAME_WARN) && (CONFIG_FRAME_WARN < 2048) +/* don't try to optimize STE allocation if the stack is too constaraining */ +#define DR_RULE_MAX_STES_OPTIMIZED 0 +#else #define DR_RULE_MAX_STES_OPTIMIZED 5 +#endif #define DR_RULE_MAX_STE_CHAIN_OPTIMIZED (DR_RULE_MAX_STES_OPTIMIZED + DR_ACTION_MAX_STES) static int dr_rule_append_to_miss_list(struct mlx5dr_domain *dmn, @@ -1218,10 +1223,7 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, mlx5dr_domain_nic_unlock(nic_dmn); - if (unlikely(!hw_ste_arr_is_opt)) - kfree(hw_ste_arr); - - return 0; + goto out; free_rule: dr_rule_clean_rule_members(rule, nic_rule); @@ -1238,6 +1240,7 @@ remove_from_nic_tbl: free_hw_ste: mlx5dr_domain_nic_unlock(nic_dmn); +out: if (unlikely(!hw_ste_arr_is_opt)) kfree(hw_ste_arr); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c22c3ac4e2a1..09e32778b012 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2951,7 +2951,7 @@ struct mlxsw_sp_nexthop_group_info { gateway:1, /* routes using the group use a gateway */ is_resilient:1; struct list_head list; /* member in nh_res_grp_list */ - struct mlxsw_sp_nexthop nexthops[0]; + struct mlxsw_sp_nexthop nexthops[]; #define nh_rif nexthops[0].rif }; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c index f9ebfaafbebc..a8348437dd87 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c @@ -1073,6 +1073,9 @@ void lan966x_ptp_deinit(struct lan966x *lan966x) struct lan966x_port *port; int i; + if (!lan966x->ptp) + return; + for (i = 0; i < lan966x->num_phys_ports; i++) { port = lan966x->ports[i]; if (!port) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c index d8dc9fbb81e1..a54c0426a35f 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c @@ -95,10 +95,7 @@ lan966x_vcap_is2_get_port_keysets(struct net_device *dev, int lookup, bool found = false; u32 val; - /* Check if the port keyset selection is enabled */ val = lan_rd(lan966x, ANA_VCAP_S2_CFG(port->chip_port)); - if (!ANA_VCAP_S2_CFG_ENA_GET(val)) - return -ENOENT; /* Collect all keysets for the port in a list */ if (l3_proto == ETH_P_ALL) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index c25612b539b9..c7ddcb829535 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1997,10 +1997,7 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii) /* 8168F family. */ { 0x7c8, 0x488, RTL_GIGA_MAC_VER_38 }, - /* It seems this chip version never made it to - * the wild. Let's disable detection. - * { 0x7cf, 0x481, RTL_GIGA_MAC_VER_36 }, - */ + { 0x7cf, 0x481, RTL_GIGA_MAC_VER_36 }, { 0x7cf, 0x480, RTL_GIGA_MAC_VER_35 }, /* 8168E family. */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index d42e1afb6521..2f7d8e4561d9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -90,7 +90,6 @@ struct mediatek_dwmac_plat_data { struct mediatek_dwmac_variant { int (*dwmac_set_phy_interface)(struct mediatek_dwmac_plat_data *plat); int (*dwmac_set_delay)(struct mediatek_dwmac_plat_data *plat); - void (*dwmac_fix_mac_speed)(void *priv, unsigned int speed); /* clock ids to be requested */ const char * const *clk_list; @@ -443,32 +442,9 @@ static int mt8195_set_delay(struct mediatek_dwmac_plat_data *plat) return 0; } -static void mt8195_fix_mac_speed(void *priv, unsigned int speed) -{ - struct mediatek_dwmac_plat_data *priv_plat = priv; - - if ((phy_interface_mode_is_rgmii(priv_plat->phy_mode))) { - /* prefer 2ns fixed delay which is controlled by TXC_PHASE_CTRL, - * when link speed is 1Gbps with RGMII interface, - * Fall back to delay macro circuit for 10/100Mbps link speed. - */ - if (speed == SPEED_1000) - regmap_update_bits(priv_plat->peri_regmap, - MT8195_PERI_ETH_CTRL0, - MT8195_RGMII_TXC_PHASE_CTRL | - MT8195_DLY_GTXC_ENABLE | - MT8195_DLY_GTXC_INV | - MT8195_DLY_GTXC_STAGES, - MT8195_RGMII_TXC_PHASE_CTRL); - else - mt8195_set_delay(priv_plat); - } -} - static const struct mediatek_dwmac_variant mt8195_gmac_variant = { .dwmac_set_phy_interface = mt8195_set_interface, .dwmac_set_delay = mt8195_set_delay, - .dwmac_fix_mac_speed = mt8195_fix_mac_speed, .clk_list = mt8195_dwmac_clk_l, .num_clks = ARRAY_SIZE(mt8195_dwmac_clk_l), .dma_bit_mask = 35, @@ -619,8 +595,6 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, plat->bsp_priv = priv_plat; plat->init = mediatek_dwmac_init; plat->clks_config = mediatek_dwmac_clks_config; - if (priv_plat->variant->dwmac_fix_mac_speed) - plat->fix_mac_speed = priv_plat->variant->dwmac_fix_mac_speed; plat->safety_feat_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->safety_feat_cfg), diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index fc06ddeac0d5..b4388ca8d211 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -210,7 +210,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp, } writel(acr_value, ptpaddr + PTP_ACR); mutex_unlock(&priv->aux_ts_lock); - ret = 0; + /* wait for auxts fifo clear to finish */ + ret = readl_poll_timeout(ptpaddr + PTP_ACR, acr_value, + !(acr_value & PTP_ACR_ATSFC), + 10, 10000); break; default: diff --git a/drivers/net/ipa/data/ipa_data-v4.7.c b/drivers/net/ipa/data/ipa_data-v4.7.c index 7552c400961e..b83390c48615 100644 --- a/drivers/net/ipa/data/ipa_data-v4.7.c +++ b/drivers/net/ipa/data/ipa_data-v4.7.c @@ -357,7 +357,7 @@ static const struct ipa_mem ipa_mem_local_data[] = { static const struct ipa_mem_data ipa_mem_data = { .local_count = ARRAY_SIZE(ipa_mem_local_data), .local = ipa_mem_local_data, - .imem_addr = 0x146a9000, + .imem_addr = 0x146a8000, .imem_size = 0x00002000, .smem_id = 497, .smem_size = 0x00009000, diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 9568fe5612ca..80849d115e5d 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -894,6 +894,12 @@ static const struct usb_device_id products[] = { USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { + /* Cinterion PLS62-W modem by GEMALTO/THALES */ + USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x005b, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, +}, { /* Cinterion PLS83/PLS63 modem by GEMALTO/THALES */ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0069, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 66e70b5f8417..a26d3127a4fe 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9831,6 +9831,7 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab) }, { USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6) }, { USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927) }, + { USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0c5e) }, { USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101) }, { USB_DEVICE(VENDOR_ID_LENOVO, 0x304f) }, { USB_DEVICE(VENDOR_ID_LENOVO, 0x3054) }, diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c index a83699de01ec..fdd0c9abc1a1 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c @@ -79,7 +79,8 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type, /* Apple ARM64 platforms have their own idea of board type, passed in * via the device tree. They also have an antenna SKU parameter */ - if (!of_property_read_string(np, "brcm,board-type", &prop)) + err = of_property_read_string(np, "brcm,board-type", &prop); + if (!err) settings->board_type = prop; if (!of_property_read_string(np, "apple,antenna-sku", &prop)) @@ -87,7 +88,7 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type, /* Set board-type to the first string of the machine compatible prop */ root = of_find_node_by_path("/"); - if (root && !settings->board_type) { + if (root && err) { char *board_type; const char *tmp; diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index c1ba4294f364..001636901dda 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -977,7 +977,7 @@ static int read_xenbus_vif_flags(struct backend_info *be) return 0; } -static int netback_remove(struct xenbus_device *dev) +static void netback_remove(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(&dev->dev); @@ -992,7 +992,6 @@ static int netback_remove(struct xenbus_device *dev) kfree(be->hotplug_script); kfree(be); dev_set_drvdata(&dev->dev, NULL); - return 0; } /* diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 14aec417fa06..12b074286df9 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -2646,7 +2646,7 @@ static void xennet_bus_close(struct xenbus_device *dev) } while (!ret); } -static int xennet_remove(struct xenbus_device *dev) +static void xennet_remove(struct xenbus_device *dev) { struct netfront_info *info = dev_get_drvdata(&dev->dev); @@ -2662,8 +2662,6 @@ static int xennet_remove(struct xenbus_device *dev) rtnl_unlock(); } xennet_free_netdev(info->netdev); - - return 0; } static const struct xenbus_device_id netfront_ids[] = { diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index 6f71ac72012e..ed9c5e2cf3ad 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -153,10 +153,17 @@ static int pn533_usb_send_ack(struct pn533 *dev, gfp_t flags) return usb_submit_urb(phy->ack_urb, flags); } +struct pn533_out_arg { + struct pn533_usb_phy *phy; + struct completion done; +}; + static int pn533_usb_send_frame(struct pn533 *dev, struct sk_buff *out) { struct pn533_usb_phy *phy = dev->phy; + struct pn533_out_arg arg; + void *cntx; int rc; if (phy->priv == NULL) @@ -168,10 +175,17 @@ static int pn533_usb_send_frame(struct pn533 *dev, print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1, out->data, out->len, false); + init_completion(&arg.done); + cntx = phy->out_urb->context; + phy->out_urb->context = &arg; + rc = usb_submit_urb(phy->out_urb, GFP_KERNEL); if (rc) return rc; + wait_for_completion(&arg.done); + phy->out_urb->context = cntx; + if (dev->protocol_type == PN533_PROTO_REQ_RESP) { /* request for response for sent packet directly */ rc = pn533_submit_urb_for_response(phy, GFP_KERNEL); @@ -408,7 +422,31 @@ static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy) return arg.rc; } -static void pn533_send_complete(struct urb *urb) +static void pn533_out_complete(struct urb *urb) +{ + struct pn533_out_arg *arg = urb->context; + struct pn533_usb_phy *phy = arg->phy; + + switch (urb->status) { + case 0: + break; /* success */ + case -ECONNRESET: + case -ENOENT: + dev_dbg(&phy->udev->dev, + "The urb has been stopped (status %d)\n", + urb->status); + break; + case -ESHUTDOWN: + default: + nfc_err(&phy->udev->dev, + "Urb failure (status %d)\n", + urb->status); + } + + complete(&arg->done); +} + +static void pn533_ack_complete(struct urb *urb) { struct pn533_usb_phy *phy = urb->context; @@ -496,10 +534,10 @@ static int pn533_usb_probe(struct usb_interface *interface, usb_fill_bulk_urb(phy->out_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), - NULL, 0, pn533_send_complete, phy); + NULL, 0, pn533_out_complete, phy); usb_fill_bulk_urb(phy->ack_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), - NULL, 0, pn533_send_complete, phy); + NULL, 0, pn533_ack_complete, phy); switch (id->driver_info) { case PN533_DEVICE_STD: diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index c03093b6813c..fc39d01e7b63 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -376,6 +376,8 @@ static void nvme_ns_head_submit_bio(struct bio *bio) * pool from the original queue to allocate the bvecs from. */ bio = bio_split_to_limits(bio); + if (!bio) + return; srcu_idx = srcu_read_lock(&head->srcu); ns = nvme_find_path(head); diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index b2272bccf85c..f08b25195ae7 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1099,7 +1099,7 @@ u64 __init dt_mem_next_cell(int s, const __be32 **cellp) */ int __init early_init_dt_scan_memory(void) { - int node; + int node, found_memory = 0; const void *fdt = initial_boot_params; fdt_for_each_subnode(node, fdt, 0) { @@ -1139,6 +1139,8 @@ int __init early_init_dt_scan_memory(void) early_init_dt_add_memory_arch(base, size); + found_memory = 1; + if (!hotpluggable) continue; @@ -1147,7 +1149,7 @@ int __init early_init_dt_scan_memory(void) base, base + size); } } - return 0; + return found_memory; } int __init early_init_dt_scan_chosen(char *cmdline) @@ -1161,18 +1163,14 @@ int __init early_init_dt_scan_chosen(char *cmdline) if (node < 0) node = fdt_path_offset(fdt, "/chosen@0"); if (node < 0) - return -ENOENT; + /* Handle the cmdline config options even if no /chosen node */ + goto handle_cmdline; chosen_node_offset = node; early_init_dt_check_for_initrd(node); early_init_dt_check_for_elfcorehdr(node); - /* Retrieve command line */ - p = of_get_flat_dt_prop(node, "bootargs", &l); - if (p != NULL && l > 0) - strscpy(cmdline, p, min(l, COMMAND_LINE_SIZE)); - rng_seed = of_get_flat_dt_prop(node, "rng-seed", &l); if (rng_seed && l > 0) { add_bootloader_randomness(rng_seed, l); @@ -1185,6 +1183,32 @@ int __init early_init_dt_scan_chosen(char *cmdline) fdt_totalsize(initial_boot_params)); } + /* Retrieve command line */ + p = of_get_flat_dt_prop(node, "bootargs", &l); + if (p != NULL && l > 0) + strscpy(cmdline, p, min(l, COMMAND_LINE_SIZE)); + +handle_cmdline: + /* + * CONFIG_CMDLINE is meant to be a default in case nothing else + * managed to set the command line, unless CONFIG_CMDLINE_FORCE + * is set in which case we override whatever was found earlier. + */ +#ifdef CONFIG_CMDLINE +#if defined(CONFIG_CMDLINE_EXTEND) + strlcat(cmdline, " ", COMMAND_LINE_SIZE); + strlcat(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE); +#elif defined(CONFIG_CMDLINE_FORCE) + strscpy(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE); +#else + /* No arguments from boot loader, use kernel's cmdl*/ + if (!((char *)cmdline)[0]) + strscpy(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE); +#endif +#endif /* CONFIG_CMDLINE */ + + pr_debug("Command line is: %s\n", (char *)cmdline); + return 0; } @@ -1277,26 +1301,6 @@ void __init early_init_dt_scan_nodes(void) if (rc) pr_warn("No chosen node found, continuing without\n"); - /* - * CONFIG_CMDLINE is meant to be a default in case nothing else - * managed to set the command line, unless CONFIG_CMDLINE_FORCE - * is set in which case we override whatever was found earlier. - */ -#ifdef CONFIG_CMDLINE -#if defined(CONFIG_CMDLINE_EXTEND) - strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); - strlcat(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#elif defined(CONFIG_CMDLINE_FORCE) - strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#else - /* No arguments from boot loader, use kernel's cmdl */ - if (!boot_command_line[0]) - strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#endif -#endif /* CONFIG_CMDLINE */ - - pr_debug("Command line is: %s\n", boot_command_line); - /* Setup memory, calling early_init_dt_add_memory_arch */ early_init_dt_scan_memory(); diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 7378e2f3e525..fcd029ca2eb1 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -1055,14 +1055,12 @@ out: return err; } -static int pcifront_xenbus_remove(struct xenbus_device *xdev) +static void pcifront_xenbus_remove(struct xenbus_device *xdev) { struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); if (pdev) free_pdev(pdev); - - return 0; } static const struct xenbus_device_id xenpci_ids[] = { diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c index e01b32d1fa17..00828f5baa97 100644 --- a/drivers/regulator/da9211-regulator.c +++ b/drivers/regulator/da9211-regulator.c @@ -498,6 +498,12 @@ static int da9211_i2c_probe(struct i2c_client *i2c) chip->chip_irq = i2c->irq; + ret = da9211_regulator_init(chip); + if (ret < 0) { + dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret); + return ret; + } + if (chip->chip_irq != 0) { ret = devm_request_threaded_irq(chip->dev, chip->chip_irq, NULL, da9211_irq_handler, @@ -512,11 +518,6 @@ static int da9211_i2c_probe(struct i2c_client *i2c) dev_warn(chip->dev, "No IRQ configured\n"); } - ret = da9211_regulator_init(chip); - - if (ret < 0) - dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret); - return ret; } diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c index 43b5b9377714..ae6021390143 100644 --- a/drivers/regulator/qcom-rpmh-regulator.c +++ b/drivers/regulator/qcom-rpmh-regulator.c @@ -1016,7 +1016,7 @@ static const struct rpmh_vreg_init_data pm8550_vreg_data[] = { RPMH_VREG("ldo8", "ldo%s8", &pmic5_pldo_lv, "vdd-l8-l9"), RPMH_VREG("ldo9", "ldo%s9", &pmic5_pldo, "vdd-l8-l9"), RPMH_VREG("ldo10", "ldo%s10", &pmic5_nldo, "vdd-l1-l4-l10"), - RPMH_VREG("ldo11", "ldo%s11", &pmic5_pldo, "vdd-l11"), + RPMH_VREG("ldo11", "ldo%s11", &pmic5_nldo, "vdd-l11"), RPMH_VREG("ldo12", "ldo%s12", &pmic5_pldo, "vdd-l12"), RPMH_VREG("ldo13", "ldo%s13", &pmic5_pldo, "vdd-l2-l13-l14"), RPMH_VREG("ldo14", "ldo%s14", &pmic5_pldo, "vdd-l2-l13-l14"), diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index b392b9f5482e..c0f85ffb2b62 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -865,6 +865,8 @@ dcssblk_submit_bio(struct bio *bio) unsigned long bytes_done; bio = bio_split_to_limits(bio); + if (!bio) + return; bytes_done = 0; dev_info = bio->bi_bdev->bd_disk->private_data; diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 41ba22f6c7f0..e9c2d306ed87 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -162,7 +162,7 @@ static void hisi_sas_slot_index_clear(struct hisi_hba *hisi_hba, int slot_idx) static void hisi_sas_slot_index_free(struct hisi_hba *hisi_hba, int slot_idx) { if (hisi_hba->hw->slot_index_alloc || - slot_idx >= HISI_SAS_UNRESERVED_IPTT) { + slot_idx < HISI_SAS_RESERVED_IPTT) { spin_lock(&hisi_hba->lock); hisi_sas_slot_index_clear(hisi_hba, slot_idx); spin_unlock(&hisi_hba->lock); diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 1ccce706167a..5e80225b5308 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -889,7 +889,9 @@ void sas_ata_device_link_abort(struct domain_device *device, bool force_reset) { struct ata_port *ap = device->sata_dev.ap; struct ata_link *link = &ap->link; + unsigned long flags; + spin_lock_irqsave(ap->lock, flags); device->sata_dev.fis[2] = ATA_ERR | ATA_DRDY; /* tf status */ device->sata_dev.fis[3] = ATA_ABORTED; /* tf error */ @@ -897,6 +899,7 @@ void sas_ata_device_link_abort(struct domain_device *device, bool force_reset) if (force_reset) link->eh_info.action |= ATA_EH_RESET; ata_link_abort(link); + spin_unlock_irqrestore(ap->lock, flags); } EXPORT_SYMBOL_GPL(sas_ata_device_link_abort); diff --git a/drivers/scsi/mpi3mr/Makefile b/drivers/scsi/mpi3mr/Makefile index ef86ca46646b..3bf8cf34e1c3 100644 --- a/drivers/scsi/mpi3mr/Makefile +++ b/drivers/scsi/mpi3mr/Makefile @@ -1,5 +1,5 @@ # mpi3mr makefile -obj-m += mpi3mr.o +obj-$(CONFIG_SCSI_MPI3MR) += mpi3mr.o mpi3mr-y += mpi3mr_os.o \ mpi3mr_fw.o \ mpi3mr_app.o \ diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 0c4aabaefdcc..286a44506578 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -3633,8 +3633,7 @@ int mpi3mr_setup_resources(struct mpi3mr_ioc *mrioc) int i, retval = 0, capb = 0; u16 message_control; u64 dma_mask = mrioc->dma_mask ? mrioc->dma_mask : - (((dma_get_required_mask(&pdev->dev) > DMA_BIT_MASK(32)) && - (sizeof(dma_addr_t) > 4)) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32)); + ((sizeof(dma_addr_t) > 4) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32)); if (pci_enable_device_mem(pdev)) { ioc_err(mrioc, "pci_enable_device_mem: failed\n"); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 4e981ccaac41..69061545d9d2 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2992,8 +2992,7 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) struct sysinfo s; u64 coherent_dma_mask, dma_mask; - if (ioc->is_mcpu_endpoint || sizeof(dma_addr_t) == 4 || - dma_get_required_mask(&pdev->dev) <= DMA_BIT_MASK(32)) { + if (ioc->is_mcpu_endpoint || sizeof(dma_addr_t) == 4) { ioc->dma_mask = 32; coherent_dma_mask = dma_mask = DMA_BIT_MASK(32); /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */ diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index cc6953809a24..8553277effb3 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -1511,8 +1511,6 @@ static int inquiry_vpd_b0(unsigned char *arr) put_unaligned_be64(sdebug_write_same_length, &arr[32]); return 0x3c; /* Mandatory page length for Logical Block Provisioning */ - - return sizeof(vpdb0_data); } /* Block device characteristics VPD page (SBC-3) */ diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index a7960ad2d386..2aa2c2aee6e7 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -231,6 +231,11 @@ scsi_abort_command(struct scsi_cmnd *scmd) struct Scsi_Host *shost = sdev->host; unsigned long flags; + if (!shost->hostt->eh_abort_handler) { + /* No abort handler, fail command directly */ + return FAILED; + } + if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) { /* * Retry after abort failed, escalate to next level. diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 13cfd3e317cc..b9b97300e3b3 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1677,6 +1677,13 @@ static const char *iscsi_session_state_name(int state) return name; } +static char *iscsi_session_target_state_name[] = { + [ISCSI_SESSION_TARGET_UNBOUND] = "UNBOUND", + [ISCSI_SESSION_TARGET_ALLOCATED] = "ALLOCATED", + [ISCSI_SESSION_TARGET_SCANNED] = "SCANNED", + [ISCSI_SESSION_TARGET_UNBINDING] = "UNBINDING", +}; + int iscsi_session_chkready(struct iscsi_cls_session *session) { int err; @@ -1786,9 +1793,13 @@ static int iscsi_user_scan_session(struct device *dev, void *data) if ((scan_data->channel == SCAN_WILD_CARD || scan_data->channel == 0) && (scan_data->id == SCAN_WILD_CARD || - scan_data->id == id)) + scan_data->id == id)) { scsi_scan_target(&session->dev, 0, id, scan_data->lun, scan_data->rescan); + spin_lock_irqsave(&session->lock, flags); + session->target_state = ISCSI_SESSION_TARGET_SCANNED; + spin_unlock_irqrestore(&session->lock, flags); + } } user_scan_exit: @@ -1961,31 +1972,41 @@ static void __iscsi_unbind_session(struct work_struct *work) struct iscsi_cls_host *ihost = shost->shost_data; unsigned long flags; unsigned int target_id; + bool remove_target = true; ISCSI_DBG_TRANS_SESSION(session, "Unbinding session\n"); /* Prevent new scans and make sure scanning is not in progress */ mutex_lock(&ihost->mutex); spin_lock_irqsave(&session->lock, flags); - if (session->target_id == ISCSI_MAX_TARGET) { + if (session->target_state == ISCSI_SESSION_TARGET_ALLOCATED) { + remove_target = false; + } else if (session->target_state != ISCSI_SESSION_TARGET_SCANNED) { spin_unlock_irqrestore(&session->lock, flags); mutex_unlock(&ihost->mutex); - goto unbind_session_exit; + ISCSI_DBG_TRANS_SESSION(session, + "Skipping target unbinding: Session is unbound/unbinding.\n"); + return; } + session->target_state = ISCSI_SESSION_TARGET_UNBINDING; target_id = session->target_id; session->target_id = ISCSI_MAX_TARGET; spin_unlock_irqrestore(&session->lock, flags); mutex_unlock(&ihost->mutex); - scsi_remove_target(&session->dev); + if (remove_target) + scsi_remove_target(&session->dev); if (session->ida_used) ida_free(&iscsi_sess_ida, target_id); -unbind_session_exit: iscsi_session_event(session, ISCSI_KEVENT_UNBIND_SESSION); ISCSI_DBG_TRANS_SESSION(session, "Completed target removal\n"); + + spin_lock_irqsave(&session->lock, flags); + session->target_state = ISCSI_SESSION_TARGET_UNBOUND; + spin_unlock_irqrestore(&session->lock, flags); } static void __iscsi_destroy_session(struct work_struct *work) @@ -2062,6 +2083,9 @@ int iscsi_add_session(struct iscsi_cls_session *session, unsigned int target_id) session->ida_used = true; } else session->target_id = target_id; + spin_lock_irqsave(&session->lock, flags); + session->target_state = ISCSI_SESSION_TARGET_ALLOCATED; + spin_unlock_irqrestore(&session->lock, flags); dev_set_name(&session->dev, "session%u", session->sid); err = device_add(&session->dev); @@ -4370,6 +4394,19 @@ iscsi_session_attr(discovery_parent_idx, ISCSI_PARAM_DISCOVERY_PARENT_IDX, 0); iscsi_session_attr(discovery_parent_type, ISCSI_PARAM_DISCOVERY_PARENT_TYPE, 0); static ssize_t +show_priv_session_target_state(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); + + return sysfs_emit(buf, "%s\n", + iscsi_session_target_state_name[session->target_state]); +} + +static ISCSI_CLASS_ATTR(priv_sess, target_state, S_IRUGO, + show_priv_session_target_state, NULL); + +static ssize_t show_priv_session_state(struct device *dev, struct device_attribute *attr, char *buf) { @@ -4471,6 +4508,7 @@ static struct attribute *iscsi_session_attrs[] = { &dev_attr_sess_boot_target.attr, &dev_attr_priv_sess_recovery_tmo.attr, &dev_attr_priv_sess_state.attr, + &dev_attr_priv_sess_target_state.attr, &dev_attr_priv_sess_creator.attr, &dev_attr_sess_chap_out_idx.attr, &dev_attr_sess_chap_in_idx.attr, @@ -4584,6 +4622,8 @@ static umode_t iscsi_session_attr_is_visible(struct kobject *kobj, return S_IRUGO | S_IWUSR; else if (attr == &dev_attr_priv_sess_state.attr) return S_IRUGO; + else if (attr == &dev_attr_priv_sess_target_state.attr) + return S_IRUGO; else if (attr == &dev_attr_priv_sess_creator.attr) return S_IRUGO; else if (attr == &dev_attr_priv_sess_target_id.attr) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index d7a84c0bfaeb..22705eb781b0 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1823,6 +1823,9 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) ret = storvsc_do_io(dev, cmd_request, get_cpu()); put_cpu(); + if (ret) + scsi_dma_unmap(scmnd); + if (ret == -EAGAIN) { /* no more space */ ret = SCSI_MLQUEUE_DEVICE_BUSY; diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index 66b316d173b0..71a3bb83984c 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c @@ -995,7 +995,7 @@ static int scsifront_suspend(struct xenbus_device *dev) return err; } -static int scsifront_remove(struct xenbus_device *dev) +static void scsifront_remove(struct xenbus_device *dev) { struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev); @@ -1011,8 +1011,6 @@ static int scsifront_remove(struct xenbus_device *dev) scsifront_free_ring(info); scsi_host_put(info->host); - - return 0; } static void scsifront_disconnect(struct vscsifrnt_info *info) diff --git a/drivers/spi/spi-cadence-xspi.c b/drivers/spi/spi-cadence-xspi.c index 520b4cc69cdc..91db3c973167 100644 --- a/drivers/spi/spi-cadence-xspi.c +++ b/drivers/spi/spi-cadence-xspi.c @@ -177,7 +177,10 @@ #define CDNS_XSPI_CMD_FLD_DSEQ_CMD_3(op) ( \ FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R3_DCNT_H, \ ((op)->data.nbytes >> 16) & 0xffff) | \ - FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R3_NUM_OF_DUMMY, (op)->dummy.nbytes * 8)) + FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R3_NUM_OF_DUMMY, \ + (op)->dummy.buswidth != 0 ? \ + (((op)->dummy.nbytes * 8) / (op)->dummy.buswidth) : \ + 0)) #define CDNS_XSPI_CMD_FLD_DSEQ_CMD_4(op, chipsel) ( \ FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R4_BANK, chipsel) | \ diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 6de8360e5c2a..9eab6c20dbc5 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -1253,6 +1253,11 @@ static int mtk_spi_probe(struct platform_device *pdev) dev_notice(dev, "SPI dma_set_mask(%d) failed, ret:%d\n", addr_bits, ret); + ret = devm_request_irq(dev, irq, mtk_spi_interrupt, + IRQF_TRIGGER_NONE, dev_name(dev), master); + if (ret) + return dev_err_probe(dev, ret, "failed to register irq\n"); + pm_runtime_enable(dev); ret = devm_spi_register_master(dev, master); @@ -1261,13 +1266,6 @@ static int mtk_spi_probe(struct platform_device *pdev) return dev_err_probe(dev, ret, "failed to register master\n"); } - ret = devm_request_irq(dev, irq, mtk_spi_interrupt, - IRQF_TRIGGER_NONE, dev_name(dev), master); - if (ret) { - pm_runtime_disable(dev); - return dev_err_probe(dev, ret, "failed to register irq\n"); - } - return 0; } diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 3cc7bb4d03de..15f174f4e056 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2310,7 +2310,7 @@ static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi, if (!of_property_read_u32(nc, "spi-max-frequency", &value)) spi->max_speed_hz = value; - if (!of_property_read_u16(nc, "spi-cs-setup-ns", &cs_setup)) { + if (!of_property_read_u16(nc, "spi-cs-setup-delay-ns", &cs_setup)) { spi->cs_setup.value = cs_setup; spi->cs_setup.unit = SPI_DELAY_UNIT_NSECS; } diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 6313e7d0cdf8..1935ca613447 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -68,7 +68,7 @@ static_assert(N_SPI_MINORS > 0 && N_SPI_MINORS <= 256); struct spidev_data { dev_t devt; - spinlock_t spi_lock; + struct mutex spi_lock; struct spi_device *spi; struct list_head device_entry; @@ -95,9 +95,8 @@ spidev_sync(struct spidev_data *spidev, struct spi_message *message) int status; struct spi_device *spi; - spin_lock_irq(&spidev->spi_lock); + mutex_lock(&spidev->spi_lock); spi = spidev->spi; - spin_unlock_irq(&spidev->spi_lock); if (spi == NULL) status = -ESHUTDOWN; @@ -107,6 +106,7 @@ spidev_sync(struct spidev_data *spidev, struct spi_message *message) if (status == 0) status = message->actual_length; + mutex_unlock(&spidev->spi_lock); return status; } @@ -359,12 +359,12 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) * we issue this ioctl. */ spidev = filp->private_data; - spin_lock_irq(&spidev->spi_lock); + mutex_lock(&spidev->spi_lock); spi = spi_dev_get(spidev->spi); - spin_unlock_irq(&spidev->spi_lock); - - if (spi == NULL) + if (spi == NULL) { + mutex_unlock(&spidev->spi_lock); return -ESHUTDOWN; + } /* use the buffer lock here for triple duty: * - prevent I/O (from us) so calling spi_setup() is safe; @@ -508,6 +508,7 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) mutex_unlock(&spidev->buf_lock); spi_dev_put(spi); + mutex_unlock(&spidev->spi_lock); return retval; } @@ -529,12 +530,12 @@ spidev_compat_ioc_message(struct file *filp, unsigned int cmd, * we issue this ioctl. */ spidev = filp->private_data; - spin_lock_irq(&spidev->spi_lock); + mutex_lock(&spidev->spi_lock); spi = spi_dev_get(spidev->spi); - spin_unlock_irq(&spidev->spi_lock); - - if (spi == NULL) + if (spi == NULL) { + mutex_unlock(&spidev->spi_lock); return -ESHUTDOWN; + } /* SPI_IOC_MESSAGE needs the buffer locked "normally" */ mutex_lock(&spidev->buf_lock); @@ -561,6 +562,7 @@ spidev_compat_ioc_message(struct file *filp, unsigned int cmd, done: mutex_unlock(&spidev->buf_lock); spi_dev_put(spi); + mutex_unlock(&spidev->spi_lock); return retval; } @@ -601,7 +603,6 @@ static int spidev_open(struct inode *inode, struct file *filp) if (!spidev->tx_buffer) { spidev->tx_buffer = kmalloc(bufsiz, GFP_KERNEL); if (!spidev->tx_buffer) { - dev_dbg(&spidev->spi->dev, "open/ENOMEM\n"); status = -ENOMEM; goto err_find_dev; } @@ -610,7 +611,6 @@ static int spidev_open(struct inode *inode, struct file *filp) if (!spidev->rx_buffer) { spidev->rx_buffer = kmalloc(bufsiz, GFP_KERNEL); if (!spidev->rx_buffer) { - dev_dbg(&spidev->spi->dev, "open/ENOMEM\n"); status = -ENOMEM; goto err_alloc_rx_buf; } @@ -640,10 +640,10 @@ static int spidev_release(struct inode *inode, struct file *filp) spidev = filp->private_data; filp->private_data = NULL; - spin_lock_irq(&spidev->spi_lock); + mutex_lock(&spidev->spi_lock); /* ... after we unbound from the underlying device? */ dofree = (spidev->spi == NULL); - spin_unlock_irq(&spidev->spi_lock); + mutex_unlock(&spidev->spi_lock); /* last close? */ spidev->users--; @@ -776,7 +776,7 @@ static int spidev_probe(struct spi_device *spi) /* Initialize the driver data */ spidev->spi = spi; - spin_lock_init(&spidev->spi_lock); + mutex_init(&spidev->spi_lock); mutex_init(&spidev->buf_lock); INIT_LIST_HEAD(&spidev->device_entry); @@ -821,9 +821,9 @@ static void spidev_remove(struct spi_device *spi) /* prevent new opens */ mutex_lock(&device_list_lock); /* make sure ops on existing fds can abort cleanly */ - spin_lock_irq(&spidev->spi_lock); + mutex_lock(&spidev->spi_lock); spidev->spi = NULL; - spin_unlock_irq(&spidev->spi_lock); + mutex_unlock(&spidev->spi_lock); list_del(&spidev->device_entry); device_destroy(spidev_class, spidev->devt); diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c index 8c42e7662033..92ed1213fe37 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c @@ -172,6 +172,7 @@ static const struct attribute_group fivr_attribute_group = { RFIM_SHOW(rfi_restriction_run_busy, 1) RFIM_SHOW(rfi_restriction_err_code, 1) RFIM_SHOW(rfi_restriction_data_rate, 1) +RFIM_SHOW(rfi_restriction_data_rate_base, 1) RFIM_SHOW(ddr_data_rate_point_0, 1) RFIM_SHOW(ddr_data_rate_point_1, 1) RFIM_SHOW(ddr_data_rate_point_2, 1) @@ -181,11 +182,13 @@ RFIM_SHOW(rfi_disable, 1) RFIM_STORE(rfi_restriction_run_busy, 1) RFIM_STORE(rfi_restriction_err_code, 1) RFIM_STORE(rfi_restriction_data_rate, 1) +RFIM_STORE(rfi_restriction_data_rate_base, 1) RFIM_STORE(rfi_disable, 1) static DEVICE_ATTR_RW(rfi_restriction_run_busy); static DEVICE_ATTR_RW(rfi_restriction_err_code); static DEVICE_ATTR_RW(rfi_restriction_data_rate); +static DEVICE_ATTR_RW(rfi_restriction_data_rate_base); static DEVICE_ATTR_RO(ddr_data_rate_point_0); static DEVICE_ATTR_RO(ddr_data_rate_point_1); static DEVICE_ATTR_RO(ddr_data_rate_point_2); @@ -248,6 +251,7 @@ static struct attribute *dvfs_attrs[] = { &dev_attr_rfi_restriction_run_busy.attr, &dev_attr_rfi_restriction_err_code.attr, &dev_attr_rfi_restriction_data_rate.attr, + &dev_attr_rfi_restriction_data_rate_base.attr, &dev_attr_ddr_data_rate_point_0.attr, &dev_attr_ddr_data_rate_point_1.attr, &dev_attr_ddr_data_rate_point_2.attr, diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 7c23112dc923..5bddb2f5e931 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -52,17 +52,22 @@ static DEFINE_SPINLOCK(xencons_lock); static struct xencons_info *vtermno_to_xencons(int vtermno) { - struct xencons_info *entry, *n, *ret = NULL; + struct xencons_info *entry, *ret = NULL; + unsigned long flags; - if (list_empty(&xenconsoles)) - return NULL; + spin_lock_irqsave(&xencons_lock, flags); + if (list_empty(&xenconsoles)) { + spin_unlock_irqrestore(&xencons_lock, flags); + return NULL; + } - list_for_each_entry_safe(entry, n, &xenconsoles, list) { + list_for_each_entry(entry, &xenconsoles, list) { if (entry->vtermno == vtermno) { ret = entry; break; } } + spin_unlock_irqrestore(&xencons_lock, flags); return ret; } @@ -223,7 +228,7 @@ static int xen_hvm_console_init(void) { int r; uint64_t v = 0; - unsigned long gfn; + unsigned long gfn, flags; struct xencons_info *info; if (!xen_hvm_domain()) @@ -258,9 +263,9 @@ static int xen_hvm_console_init(void) goto err; info->vtermno = HVC_COOKIE; - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_add_tail(&info->list, &xenconsoles); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; err: @@ -283,6 +288,7 @@ static int xencons_info_pv_init(struct xencons_info *info, int vtermno) static int xen_pv_console_init(void) { struct xencons_info *info; + unsigned long flags; if (!xen_pv_domain()) return -ENODEV; @@ -299,9 +305,9 @@ static int xen_pv_console_init(void) /* already configured */ return 0; } - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); xencons_info_pv_init(info, HVC_COOKIE); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; } @@ -309,6 +315,7 @@ static int xen_pv_console_init(void) static int xen_initial_domain_console_init(void) { struct xencons_info *info; + unsigned long flags; if (!xen_initial_domain()) return -ENODEV; @@ -323,9 +330,9 @@ static int xen_initial_domain_console_init(void) info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0, false); info->vtermno = HVC_COOKIE; - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_add_tail(&info->list, &xenconsoles); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; } @@ -380,10 +387,12 @@ static void xencons_free(struct xencons_info *info) static int xen_console_remove(struct xencons_info *info) { + unsigned long flags; + xencons_disconnect_backend(info); - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_del(&info->list); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); if (info->xbdev != NULL) xencons_free(info); else { @@ -394,9 +403,9 @@ static int xen_console_remove(struct xencons_info *info) return 0; } -static int xencons_remove(struct xenbus_device *dev) +static void xencons_remove(struct xenbus_device *dev) { - return xen_console_remove(dev_get_drvdata(&dev->dev)); + xen_console_remove(dev_get_drvdata(&dev->dev)); } static int xencons_connect_backend(struct xenbus_device *dev, @@ -464,6 +473,7 @@ static int xencons_probe(struct xenbus_device *dev, { int ret, devid; struct xencons_info *info; + unsigned long flags; devid = dev->nodename[strlen(dev->nodename) - 1] - '0'; if (devid == 0) @@ -482,9 +492,9 @@ static int xencons_probe(struct xenbus_device *dev, ret = xencons_connect_backend(dev, info); if (ret < 0) goto error; - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_add_tail(&info->list, &xenconsoles); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; @@ -584,10 +594,12 @@ static int __init xen_hvc_init(void) info->hvc = hvc_alloc(HVC_COOKIE, info->irq, ops, 256); if (IS_ERR(info->hvc)) { + unsigned long flags; + r = PTR_ERR(info->hvc); - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_del(&info->list); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); if (info->irq) unbind_from_irqhandler(info->irq, NULL); kfree(info); diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index e18c9f4463ec..bda61be5f035 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6056,6 +6056,14 @@ void ufshcd_schedule_eh_work(struct ufs_hba *hba) } } +static void ufshcd_force_error_recovery(struct ufs_hba *hba) +{ + spin_lock_irq(hba->host->host_lock); + hba->force_reset = true; + ufshcd_schedule_eh_work(hba); + spin_unlock_irq(hba->host->host_lock); +} + static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow) { down_write(&hba->clk_scaling_lock); @@ -9083,6 +9091,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) if (!hba->dev_info.b_rpm_dev_flush_capable) { ret = ufshcd_set_dev_pwr_mode(hba, req_dev_pwr_mode); + if (ret && pm_op != UFS_SHUTDOWN_PM) { + /* + * If return err in suspend flow, IO will hang. + * Trigger error handler and break suspend for + * error recovery. + */ + ufshcd_force_error_recovery(hba); + ret = -EBUSY; + } if (ret) goto enable_scaling; } @@ -9094,6 +9111,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) */ check_for_bkops = !ufshcd_is_ufs_dev_deepsleep(hba); ret = ufshcd_link_state_transition(hba, req_link_state, check_for_bkops); + if (ret && pm_op != UFS_SHUTDOWN_PM) { + /* + * If return err in suspend flow, IO will hang. + * Trigger error handler and break suspend for + * error recovery. + */ + ufshcd_force_error_recovery(hba); + ret = -EBUSY; + } if (ret) goto set_dev_active; diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 60e8174686a1..d7c8461976ce 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -207,7 +207,7 @@ static int ulpi_read_id(struct ulpi *ulpi) /* Test the interface */ ret = ulpi_write(ulpi, ULPI_SCRATCH, 0xaa); if (ret < 0) - return ret; + goto err; ret = ulpi_read(ulpi, ULPI_SCRATCH); if (ret < 0) diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c index 8607d4c23283..0745e9f11b2e 100644 --- a/drivers/usb/dwc3/dwc3-xilinx.c +++ b/drivers/usb/dwc3/dwc3-xilinx.c @@ -13,6 +13,7 @@ #include <linux/of.h> #include <linux/platform_device.h> #include <linux/dma-mapping.h> +#include <linux/gpio/consumer.h> #include <linux/of_gpio.h> #include <linux/of_platform.h> #include <linux/pm_runtime.h> diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 789976567f9f..89dcfac01235 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1727,6 +1727,7 @@ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool int else if (!ret) dep->flags |= DWC3_EP_END_TRANSFER_PENDING; + dep->flags &= ~DWC3_EP_DELAY_STOP; return ret; } @@ -3732,8 +3733,10 @@ void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, if (dep->number <= 1 && dwc->ep0state != EP0_DATA_PHASE) return; + if (interrupt && (dep->flags & DWC3_EP_DELAY_STOP)) + return; + if (!(dep->flags & DWC3_EP_TRANSFER_STARTED) || - (dep->flags & DWC3_EP_DELAY_STOP) || (dep->flags & DWC3_EP_END_TRANSFER_PENDING)) return; diff --git a/drivers/usb/fotg210/fotg210-core.c b/drivers/usb/fotg210/fotg210-core.c index 8a54edf921ac..ee740a6da463 100644 --- a/drivers/usb/fotg210/fotg210-core.c +++ b/drivers/usb/fotg210/fotg210-core.c @@ -144,10 +144,7 @@ static struct platform_driver fotg210_driver = { static int __init fotg210_init(void) { - if (usb_disabled()) - return -ENODEV; - - if (IS_ENABLED(CONFIG_USB_FOTG210_HCD)) + if (IS_ENABLED(CONFIG_USB_FOTG210_HCD) && !usb_disabled()) fotg210_hcd_init(); return platform_driver_register(&fotg210_driver); } diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c index 66e1b7ee3346..87cca81bf4ac 100644 --- a/drivers/usb/fotg210/fotg210-udc.c +++ b/drivers/usb/fotg210/fotg210-udc.c @@ -1201,6 +1201,8 @@ int fotg210_udc_probe(struct platform_device *pdev) dev_info(dev, "found and initialized PHY\n"); } + ret = -ENOMEM; + for (i = 0; i < FOTG210_MAX_NUM_EP; i++) { fotg210->ep[i] = kzalloc(sizeof(struct fotg210_ep), GFP_KERNEL); if (!fotg210->ep[i]) diff --git a/drivers/usb/host/xen-hcd.c b/drivers/usb/host/xen-hcd.c index de1b09158318..46fdab940092 100644 --- a/drivers/usb/host/xen-hcd.c +++ b/drivers/usb/host/xen-hcd.c @@ -1530,15 +1530,13 @@ static void xenhcd_backend_changed(struct xenbus_device *dev, } } -static int xenhcd_remove(struct xenbus_device *dev) +static void xenhcd_remove(struct xenbus_device *dev) { struct xenhcd_info *info = dev_get_drvdata(&dev->dev); struct usb_hcd *hcd = xenhcd_info_to_hcd(info); xenhcd_destroy_rings(info); usb_put_hcd(hcd); - - return 0; } static int xenhcd_probe(struct xenbus_device *dev, diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c index 8752d389e382..d7f3e6281ce4 100644 --- a/drivers/video/fbdev/xen-fbfront.c +++ b/drivers/video/fbdev/xen-fbfront.c @@ -67,7 +67,7 @@ MODULE_PARM_DESC(video, "Video memory size in MB, width, height in pixels (default 2,800,600)"); static void xenfb_make_preferred_console(void); -static int xenfb_remove(struct xenbus_device *); +static void xenfb_remove(struct xenbus_device *); static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *); static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *); static void xenfb_disconnect_backend(struct xenfb_info *); @@ -523,7 +523,7 @@ static int xenfb_resume(struct xenbus_device *dev) return xenfb_connect_backend(dev, info); } -static int xenfb_remove(struct xenbus_device *dev) +static void xenfb_remove(struct xenbus_device *dev) { struct xenfb_info *info = dev_get_drvdata(&dev->dev); @@ -538,8 +538,6 @@ static int xenfb_remove(struct xenbus_device *dev) vfree(info->gfns); vfree(info->fb); kfree(info); - - return 0; } static unsigned long vmalloc_to_gfn(void *address) diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 28b2a1fa25ab..0d4f8f4f4948 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -1181,9 +1181,8 @@ static void pvcalls_back_changed(struct xenbus_device *dev, } } -static int pvcalls_back_remove(struct xenbus_device *dev) +static void pvcalls_back_remove(struct xenbus_device *dev) { - return 0; } static int pvcalls_back_uevent(struct xenbus_device *xdev, diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index 1826e8e67125..d5d589bda243 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -225,6 +225,8 @@ again: return IRQ_HANDLED; } +static void free_active_ring(struct sock_mapping *map); + static void pvcalls_front_free_map(struct pvcalls_bedata *bedata, struct sock_mapping *map) { @@ -240,7 +242,7 @@ static void pvcalls_front_free_map(struct pvcalls_bedata *bedata, for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++) gnttab_end_foreign_access(map->active.ring->ref[i], NULL); gnttab_end_foreign_access(map->active.ref, NULL); - free_page((unsigned long)map->active.ring); + free_active_ring(map); kfree(map); } @@ -1085,7 +1087,7 @@ static const struct xenbus_device_id pvcalls_front_ids[] = { { "" } }; -static int pvcalls_front_remove(struct xenbus_device *dev) +static void pvcalls_front_remove(struct xenbus_device *dev) { struct pvcalls_bedata *bedata; struct sock_mapping *map = NULL, *n; @@ -1121,7 +1123,6 @@ static int pvcalls_front_remove(struct xenbus_device *dev) kfree(bedata->ring.sring); kfree(bedata); xenbus_switch_state(dev, XenbusStateClosed); - return 0; } static int pvcalls_front_probe(struct xenbus_device *dev, diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index d171091eec12..b11e401f1b1e 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -716,14 +716,12 @@ out: return err; } -static int xen_pcibk_xenbus_remove(struct xenbus_device *dev) +static void xen_pcibk_xenbus_remove(struct xenbus_device *dev) { struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev); if (pdev != NULL) free_pdev(pdev); - - return 0; } static const struct xenbus_device_id xen_pcibk_ids[] = { diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 6106ed93817d..954188b0b858 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1249,7 +1249,7 @@ static void scsiback_release_translation_entry(struct vscsibk_info *info) spin_unlock_irqrestore(&info->v2p_lock, flags); } -static int scsiback_remove(struct xenbus_device *dev) +static void scsiback_remove(struct xenbus_device *dev) { struct vscsibk_info *info = dev_get_drvdata(&dev->dev); @@ -1261,8 +1261,6 @@ static int scsiback_remove(struct xenbus_device *dev) gnttab_page_cache_shrink(&info->free_pages, 0); dev_set_drvdata(&dev->dev, NULL); - - return 0; } static int scsiback_probe(struct xenbus_device *dev, diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 7dcd59693a0c..d4ddb20d6732 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -13,6 +13,8 @@ #include "internal.h" #include "afs_cm.h" #include "protocol_yfs.h" +#define RXRPC_TRACE_ONLY_DEFINE_ENUMS +#include <trace/events/rxrpc.h> static int afs_deliver_cb_init_call_back_state(struct afs_call *); static int afs_deliver_cb_init_call_back_state3(struct afs_call *); @@ -191,7 +193,7 @@ static void afs_cm_destructor(struct afs_call *call) * Abort a service call from within an action function. */ static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error, - const char *why) + enum rxrpc_abort_reason why) { rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, error, why); @@ -469,7 +471,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0) afs_send_empty_reply(call); else - afs_abort_service_call(call, 1, 1, "K-1"); + afs_abort_service_call(call, 1, 1, afs_abort_probeuuid_negative); afs_put_call(call); _leave(""); diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index c62939e5ea1f..7817e2b860e5 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -13,6 +13,8 @@ #include "internal.h" #include "afs_cm.h" #include "protocol_yfs.h" +#define RXRPC_TRACE_ONLY_DEFINE_ENUMS +#include <trace/events/rxrpc.h> struct workqueue_struct *afs_async_calls; @@ -397,7 +399,8 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) error_do_abort: if (ret != -ECONNABORTED) { rxrpc_kernel_abort_call(call->net->socket, rxcall, - RX_USER_ABORT, ret, "KSD"); + RX_USER_ABORT, ret, + afs_abort_send_data_error); } else { len = 0; iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0); @@ -527,7 +530,8 @@ static void afs_deliver_to_call(struct afs_call *call) case -ENOTSUPP: abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, ret, "KIV"); + abort_code, ret, + afs_abort_op_not_supported); goto local_abort; case -EIO: pr_err("kAFS: Call %u in bad state %u\n", @@ -542,12 +546,14 @@ static void afs_deliver_to_call(struct afs_call *call) if (state != AFS_CALL_CL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, ret, "KUM"); + abort_code, ret, + afs_abort_unmarshal_error); goto local_abort; default: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, ret, "KER"); + abort_code, ret, + afs_abort_general_error); goto local_abort; } } @@ -619,7 +625,8 @@ long afs_wait_for_call_to_complete(struct afs_call *call, /* Kill off the call if it's still live. */ _debug("call interrupted"); if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - RX_USER_ABORT, -EINTR, "KWI")) + RX_USER_ABORT, -EINTR, + afs_abort_interrupted)) afs_set_call_complete(call, -EINTR, 0); } } @@ -836,7 +843,8 @@ void afs_send_empty_reply(struct afs_call *call) case -ENOMEM: _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, - RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); + RXGEN_SS_MARSHAL, -ENOMEM, + afs_abort_oom); fallthrough; default: _leave(" [error]"); @@ -878,7 +886,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, - RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); + RXGEN_SS_MARSHAL, -ENOMEM, + afs_abort_oom); } _leave(" [error]"); } @@ -900,6 +909,7 @@ int afs_extract_data(struct afs_call *call, bool want_more) ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter, &call->iov_len, want_more, &remote_abort, &call->service_id); + trace_afs_receive_data(call, call->iter, want_more, ret); if (ret == 0 || ret == -EAGAIN) return ret; diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index b8fb7ef6b520..8affc88b0e0a 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -329,7 +329,16 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, &map_length, &bioc, mirror_num); if (ret) goto out_counter_dec; - BUG_ON(mirror_num != bioc->mirror_num); + /* + * This happens when dev-replace is also running, and the + * mirror_num indicates the dev-replace target. + * + * In this case, we don't need to do anything, as the read + * error just means the replace progress hasn't reached our + * read range, and later replace routine would handle it well. + */ + if (mirror_num != bioc->mirror_num) + goto out_counter_dec; } sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0888d484df80..8aeaada1fcae 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -530,6 +530,9 @@ static int validate_extent_buffer(struct extent_buffer *eb, } if (found_level != check->level) { + btrfs_err(fs_info, + "level verify failed on logical %llu mirror %u wanted %u found %u", + eb->start, eb->read_mirror, check->level, found_level); ret = -EIO; goto out; } @@ -3381,6 +3384,8 @@ out: /* * Do various sanity and dependency checks of different features. * + * @is_rw_mount: If the mount is read-write. + * * This is the place for less strict checks (like for subpage or artificial * feature dependencies). * @@ -3391,7 +3396,7 @@ out: * (space cache related) can modify on-disk format like free space tree and * screw up certain feature dependencies. */ -int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb) +int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount) { struct btrfs_super_block *disk_super = fs_info->super_copy; u64 incompat = btrfs_super_incompat_flags(disk_super); @@ -3430,7 +3435,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb) if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) incompat |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; - if (compat_ro_unsupp && !sb_rdonly(sb)) { + if (compat_ro_unsupp && is_rw_mount) { btrfs_err(fs_info, "cannot mount read-write because of unknown compat_ro features (0x%llx)", compat_ro); @@ -3633,7 +3638,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_alloc; } - ret = btrfs_check_features(fs_info, sb); + ret = btrfs_check_features(fs_info, !sb_rdonly(sb)); if (ret < 0) { err = ret; goto fail_alloc; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 363935cfc084..f2f295eb6103 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -50,7 +50,7 @@ int __cold open_ctree(struct super_block *sb, void __cold close_ctree(struct btrfs_fs_info *fs_info); int btrfs_validate_super(struct btrfs_fs_info *fs_info, struct btrfs_super_block *sb, int mirror_num); -int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb); +int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount); int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors); struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev); struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c index 9ae9cd1e7035..3c7766dfaa69 100644 --- a/fs/btrfs/extent-io-tree.c +++ b/fs/btrfs/extent-io-tree.c @@ -1551,7 +1551,7 @@ u64 count_range_bits(struct extent_io_tree *tree, u64 last = 0; int found = 0; - if (WARN_ON(search_end <= cur_start)) + if (WARN_ON(search_end < cur_start)) return 0; spin_lock(&tree->lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 892d78c1853c..72ba13b027a9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1713,6 +1713,11 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, BUG(); if (ret && insert_reserved) btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); + if (ret < 0) + btrfs_err(trans->fs_info, +"failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d", + node->bytenr, node->num_bytes, node->type, + node->action, node->ref_mod, ret); return ret; } @@ -1954,8 +1959,6 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans, if (ret) { unselect_delayed_ref_head(delayed_refs, locked_ref); btrfs_put_delayed_ref(ref); - btrfs_debug(fs_info, "run_one_delayed_ref returned %d", - ret); return ret; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 83dd3aa59663..9bd32daa9b9a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -104,6 +104,15 @@ struct btrfs_bio_ctrl { btrfs_bio_end_io_t end_io_func; /* + * This is for metadata read, to provide the extra needed verification + * info. This has to be provided for submit_one_bio(), as + * submit_one_bio() can submit a bio if it ends at stripe boundary. If + * no such parent_check is provided, the metadata can hit false alert at + * endio time. + */ + struct btrfs_tree_parent_check *parent_check; + + /* * Tell writepage not to lock the state bits for this range, it still * does the unlocking. */ @@ -133,13 +142,24 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) btrfs_bio(bio)->file_offset = page_offset(bv->bv_page) + bv->bv_offset; - if (!is_data_inode(&inode->vfs_inode)) + if (!is_data_inode(&inode->vfs_inode)) { + if (btrfs_op(bio) != BTRFS_MAP_WRITE) { + /* + * For metadata read, we should have the parent_check, + * and copy it to bbio for metadata verification. + */ + ASSERT(bio_ctrl->parent_check); + memcpy(&btrfs_bio(bio)->parent_check, + bio_ctrl->parent_check, + sizeof(struct btrfs_tree_parent_check)); + } btrfs_submit_metadata_bio(inode, bio, mirror_num); - else if (btrfs_op(bio) == BTRFS_MAP_WRITE) + } else if (btrfs_op(bio) == BTRFS_MAP_WRITE) { btrfs_submit_data_write_bio(inode, bio, mirror_num); - else + } else { btrfs_submit_data_read_bio(inode, bio, mirror_num, bio_ctrl->compress_type); + } /* The bio is owned by the end_io handler now */ bio_ctrl->bio = NULL; @@ -4829,6 +4849,7 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, struct extent_state *cached_state = NULL; struct btrfs_bio_ctrl bio_ctrl = { .mirror_num = mirror_num, + .parent_check = check, }; int ret = 0; @@ -4878,7 +4899,6 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, */ atomic_dec(&eb->io_pages); } - memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check)); submit_one_bio(&bio_ctrl); if (ret || wait != WAIT_COMPLETE) { free_extent_state(cached_state); @@ -4905,6 +4925,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, unsigned long num_reads = 0; struct btrfs_bio_ctrl bio_ctrl = { .mirror_num = mirror_num, + .parent_check = check, }; if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) @@ -4996,7 +5017,6 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, } } - memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check)); submit_one_bio(&bio_ctrl); if (ret || wait != WAIT_COMPLETE) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 91b00eb2440e..834bbcb91102 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3354,7 +3354,7 @@ bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end, bool search_io_tree = true; bool ret = false; - while (cur_offset < end) { + while (cur_offset <= end) { u64 delalloc_start; u64 delalloc_end; bool delalloc; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2ead7b1bdbaf..98a800b8bd43 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7092,7 +7092,7 @@ next: * Other members are not utilized for inline extents. */ ASSERT(em->block_start == EXTENT_MAP_INLINE); - ASSERT(em->len = fs_info->sectorsize); + ASSERT(em->len == fs_info->sectorsize); ret = read_inline_extent(inode, path, page); if (ret < 0) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d5de18d6517e..433ce221dc5c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1705,7 +1705,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (ret) goto restore; - ret = btrfs_check_features(fs_info, sb); + ret = btrfs_check_features(fs_info, !(*flags & SB_RDONLY)); if (ret < 0) goto restore; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 4b159f97fe7b..f75ad432f375 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2913,7 +2913,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got while (true) { flags &= CEPH_FILE_MODE_MASK; - if (atomic_read(&fi->num_locks)) + if (vfs_inode_has_locks(inode)) flags |= CHECK_FILELOCK; _got = 0; ret = try_get_cap_refs(inode, need, want, endoff, diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index f3b461c708a8..9c8dc8a55e7e 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -32,24 +32,36 @@ void __init ceph_flock_init(void) static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) { - struct ceph_file_info *fi = dst->fl_file->private_data; struct inode *inode = file_inode(dst->fl_file); atomic_inc(&ceph_inode(inode)->i_filelock_ref); - atomic_inc(&fi->num_locks); + dst->fl_u.ceph.inode = igrab(inode); } +/* + * Do not use the 'fl->fl_file' in release function, which + * is possibly already released by another thread. + */ static void ceph_fl_release_lock(struct file_lock *fl) { - struct ceph_file_info *fi = fl->fl_file->private_data; - struct inode *inode = file_inode(fl->fl_file); - struct ceph_inode_info *ci = ceph_inode(inode); - atomic_dec(&fi->num_locks); + struct inode *inode = fl->fl_u.ceph.inode; + struct ceph_inode_info *ci; + + /* + * If inode is NULL it should be a request file_lock, + * nothing we can do. + */ + if (!inode) + return; + + ci = ceph_inode(inode); if (atomic_dec_and_test(&ci->i_filelock_ref)) { /* clear error when all locks are released */ spin_lock(&ci->i_ceph_lock); ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK; spin_unlock(&ci->i_ceph_lock); } + fl->fl_u.ceph.inode = NULL; + iput(inode); } static const struct file_lock_operations ceph_fl_lock_ops = { diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 30bdb391a0dc..0ed3be75bb9a 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -790,7 +790,6 @@ struct ceph_file_info { struct list_head rw_contexts; u32 filp_gen; - atomic_t num_locks; }; struct ceph_dir_file_info { diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c index b541e68378f6..b64d20374b9c 100644 --- a/fs/cifs/dfs.c +++ b/fs/cifs/dfs.c @@ -327,8 +327,8 @@ static int update_server_fullpath(struct TCP_Server_Info *server, struct cifs_sb return rc; } -static int target_share_matches_server(struct TCP_Server_Info *server, const char *tcp_host, - size_t tcp_host_len, char *share, bool *target_match) +static int target_share_matches_server(struct TCP_Server_Info *server, char *share, + bool *target_match) { int rc = 0; const char *dfs_host; @@ -338,13 +338,16 @@ static int target_share_matches_server(struct TCP_Server_Info *server, const cha extract_unc_hostname(share, &dfs_host, &dfs_host_len); /* Check if hostnames or addresses match */ - if (dfs_host_len != tcp_host_len || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) { - cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len, - dfs_host, (int)tcp_host_len, tcp_host); + cifs_server_lock(server); + if (dfs_host_len != strlen(server->hostname) || + strncasecmp(dfs_host, server->hostname, dfs_host_len)) { + cifs_dbg(FYI, "%s: %.*s doesn't match %s\n", __func__, + (int)dfs_host_len, dfs_host, server->hostname); rc = match_target_ip(server, dfs_host, dfs_host_len, target_match); if (rc) cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc); } + cifs_server_unlock(server); return rc; } @@ -358,13 +361,9 @@ static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *t struct cifs_ses *root_ses = CIFS_DFS_ROOT_SES(tcon->ses); struct cifs_tcon *ipc = root_ses->tcon_ipc; char *share = NULL, *prefix = NULL; - const char *tcp_host; - size_t tcp_host_len; struct dfs_cache_tgt_iterator *tit; bool target_match; - extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len); - tit = dfs_cache_get_tgt_iterator(tl); if (!tit) { rc = -ENOENT; @@ -387,8 +386,7 @@ static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *t break; } - rc = target_share_matches_server(server, tcp_host, tcp_host_len, share, - &target_match); + rc = target_share_matches_server(server, share, &target_match); if (rc) break; if (!target_match) { @@ -401,8 +399,7 @@ static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *t if (ipc->need_reconnect) { scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); rc = ops->tree_connect(xid, ipc->ses, tree, ipc, cifs_sb->local_nls); - if (rc) - break; + cifs_dbg(FYI, "%s: reconnect ipc: %d\n", __func__, rc); } scnprintf(tree, MAX_TREE_SIZE, "\\%s", share); @@ -498,7 +495,9 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru } if (tcon->ipc) { + cifs_server_lock(server); scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); + cifs_server_unlock(server); rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc); goto out; } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 4d3c586785a5..2a19c7987c5b 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1277,7 +1277,9 @@ int match_target_ip(struct TCP_Server_Info *server, if (rc < 0) return rc; + spin_lock(&server->srv_lock); *result = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, (struct sockaddr *)&ss); + spin_unlock(&server->srv_lock); cifs_dbg(FYI, "%s: ip addresses match: %u\n", __func__, *result); return 0; } diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 9e7d9f0baa18..0b842a07e157 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -292,9 +292,10 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) continue; } kref_get(&iface->refcount); + break; } - if (!list_entry_is_head(iface, &ses->iface_list, iface_head)) { + if (list_entry_is_head(iface, &ses->iface_list, iface_head)) { rc = 1; iface = NULL; cifs_dbg(FYI, "unable to find a suitable iface\n"); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index dc160de7a6de..e6bcd2baf446 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -530,7 +530,6 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, p = buf; spin_lock(&ses->iface_lock); - ses->iface_count = 0; /* * Go through iface_list and do kref_put to remove * any unused ifaces. ifaces in use will be removed @@ -540,6 +539,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, iface_head) { iface->is_active = 0; kref_put(&iface->refcount, release_iface); + ses->iface_count--; } spin_unlock(&ses->iface_lock); @@ -618,6 +618,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, /* just get a ref so that it doesn't get picked/freed */ iface->is_active = 1; kref_get(&iface->refcount); + ses->iface_count++; spin_unlock(&ses->iface_lock); goto next_iface; } else if (ret < 0) { @@ -4488,17 +4489,12 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, /* copy pages form the old */ for (j = 0; j < npages; j++) { - char *dst, *src; unsigned int offset, len; rqst_page_get_length(new, j, &len, &offset); - dst = kmap_local_page(new->rq_pages[j]) + offset; - src = kmap_local_page(old->rq_pages[j]) + offset; - - memcpy(dst, src, len); - kunmap(new->rq_pages[j]); - kunmap(old->rq_pages[j]); + memcpy_page(new->rq_pages[j], offset, + old->rq_pages[j], offset, len); } } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a5695748a89b..2c484d47c592 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -541,9 +541,10 @@ static void assemble_neg_contexts(struct smb2_negotiate_req *req, struct TCP_Server_Info *server, unsigned int *total_len) { - char *pneg_ctxt; - char *hostname = NULL; unsigned int ctxt_len, neg_context_count; + struct TCP_Server_Info *pserver; + char *pneg_ctxt; + char *hostname; if (*total_len > 200) { /* In case length corrupted don't want to overrun smb buffer */ @@ -574,8 +575,9 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, * secondary channels don't have the hostname field populated * use the hostname field in the primary channel instead */ - hostname = CIFS_SERVER_IS_CHAN(server) ? - server->primary_server->hostname : server->hostname; + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + cifs_server_lock(pserver); + hostname = pserver->hostname; if (hostname && (hostname[0] != 0)) { ctxt_len = build_netname_ctxt((struct smb2_netname_neg_context *)pneg_ctxt, hostname); @@ -584,6 +586,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, neg_context_count = 3; } else neg_context_count = 2; + cifs_server_unlock(pserver); build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt); *total_len += sizeof(struct smb2_posix_neg_context); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 9c329a365e75..3a155c1d810e 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -458,15 +458,16 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) /* panic? */ return -EIO; + res = -EIO; if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN) - return -EIO; + goto out; fd.search_key->cat = HFS_I(main_inode)->cat_key; if (hfs_brec_find(&fd)) - /* panic? */ goto out; if (S_ISDIR(main_inode->i_mode)) { - WARN_ON(fd.entrylength < sizeof(struct hfs_cat_dir)); + if (fd.entrylength < sizeof(struct hfs_cat_dir)) + goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_dir)); if (rec.type != HFS_CDR_DIR || @@ -479,6 +480,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_dir)); } else if (HFS_IS_RSRC(inode)) { + if (fd.entrylength < sizeof(struct hfs_cat_file)) + goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); hfs_inode_write_fork(inode, rec.file.RExtRec, @@ -486,7 +489,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); } else { - WARN_ON(fd.entrylength < sizeof(struct hfs_cat_file)); + if (fd.entrylength < sizeof(struct hfs_cat_file)) + goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); if (rec.type != HFS_CDR_FIL || @@ -503,9 +507,10 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); } + res = 0; out: hfs_find_exit(&fd); - return 0; + return res; } static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c index 2a39ffb8423b..6e61b5bc7d86 100644 --- a/fs/ksmbd/auth.c +++ b/fs/ksmbd/auth.c @@ -322,7 +322,8 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, dn_off = le32_to_cpu(authblob->DomainName.BufferOffset); dn_len = le16_to_cpu(authblob->DomainName.Length); - if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len) + if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len || + nt_len < CIFS_ENCPWD_SIZE) return -EINVAL; /* TODO : use domain name that imported from configuration file */ diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 12be8386446a..fd0a288af299 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -316,9 +316,12 @@ int ksmbd_conn_handler_loop(void *p) /* 4 for rfc1002 length field */ size = pdu_size + 4; - conn->request_buf = kvmalloc(size, GFP_KERNEL); + conn->request_buf = kvmalloc(size, + GFP_KERNEL | + __GFP_NOWARN | + __GFP_NORETRY); if (!conn->request_buf) - continue; + break; memcpy(conn->request_buf, hdr_buf, sizeof(hdr_buf)); if (!ksmbd_smb_request(conn)) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 14d7f3599c63..38fbda52e06f 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -1928,13 +1928,13 @@ int smb2_tree_connect(struct ksmbd_work *work) if (conn->posix_ext_supported) status.tree_conn->posix_extensions = true; -out_err1: rsp->StructureSize = cpu_to_le16(16); + inc_rfc1001_len(work->response_buf, 16); +out_err1: rsp->Capabilities = 0; rsp->Reserved = 0; /* default manual caching */ rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING; - inc_rfc1001_len(work->response_buf, 16); if (!IS_ERR(treename)) kfree(treename); @@ -1967,6 +1967,9 @@ out_err1: rsp->hdr.Status = STATUS_ACCESS_DENIED; } + if (status.ret != KSMBD_TREE_CONN_STATUS_OK) + smb2_set_err_rsp(work); + return rc; } diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 63d55f543bd2..4c6bd0b69979 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -295,6 +295,7 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, struct msghdr ksmbd_msg; struct kvec *iov; struct ksmbd_conn *conn = KSMBD_TRANS(t)->conn; + int max_retry = 2; iov = get_conn_iovec(t, nr_segs); if (!iov) @@ -321,9 +322,11 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, } else if (conn->status == KSMBD_SESS_NEED_RECONNECT) { total_read = -EAGAIN; break; - } else if (length == -ERESTARTSYS || length == -EAGAIN) { + } else if ((length == -ERESTARTSYS || length == -EAGAIN) && + max_retry) { usleep_range(1000, 2000); length = 0; + max_retry--; continue; } else if (length <= 0) { total_read = -EAGAIN; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ea1ceffa1d3a..f7e4a88d5d92 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2957,12 +2957,14 @@ static u64 nfs_access_login_time(const struct task_struct *task, const struct cred *cred) { const struct task_struct *parent; + const struct cred *pcred; u64 ret; rcu_read_lock(); for (;;) { parent = rcu_dereference(task->real_parent); - if (parent == task || cred_fscmp(parent->cred, cred) != 0) + pcred = rcu_dereference(parent->cred); + if (parent == task || cred_fscmp(pcred, cred) != 0) break; task = parent; } @@ -3023,6 +3025,7 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre * but do it without locking. */ struct nfs_inode *nfsi = NFS_I(inode); + u64 login_time = nfs_access_login_time(current, cred); struct nfs_access_entry *cache; int err = -ECHILD; struct list_head *lh; @@ -3037,6 +3040,8 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre cache = NULL; if (cache == NULL) goto out; + if ((s64)(login_time - cache->timestamp) > 0) + goto out; if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) goto out; *mask = cache->mask; diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index ad34a33b0737..4974cd18ca46 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -783,6 +783,12 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, return &fl->generic_hdr; } +static bool +filelayout_lseg_is_striped(const struct nfs4_filelayout_segment *flseg) +{ + return flseg->num_fh > 1; +} + /* * filelayout_pg_test(). Called by nfs_can_coalesce_requests() * @@ -803,6 +809,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, size = pnfs_generic_pg_test(pgio, prev, req); if (!size) return 0; + else if (!filelayout_lseg_is_striped(FILELAYOUT_LSEG(pgio->pg_lseg))) + return size; /* see if req and prev are in the same stripe */ if (prev) { diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 45b2c9e3f636..0ef070349014 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1071,8 +1071,8 @@ nfsd_file_is_cached(struct inode *inode) static __be32 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf, - bool open, bool want_gc) + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf, bool want_gc) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_FULL, @@ -1147,8 +1147,7 @@ wait_for_construction: status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); out: if (status == nfs_ok) { - if (open) - this_cpu_inc(nfsd_file_acquisitions); + this_cpu_inc(nfsd_file_acquisitions); *pnf = nf; } else { if (refcount_dec_and_test(&nf->nf_ref)) @@ -1158,20 +1157,23 @@ out: out_status: put_cred(key.cred); - if (open) - trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); + trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); return status; open_file: trace_nfsd_file_alloc(nf); nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); if (nf->nf_mark) { - if (open) { + if (file) { + get_file(file); + nf->nf_file = file; + status = nfs_ok; + trace_nfsd_file_opened(nf, status); + } else { status = nfsd_open_verified(rqstp, fhp, may_flags, &nf->nf_file); trace_nfsd_file_open(nf, status); - } else - status = nfs_ok; + } } else status = nfserr_jukebox; /* @@ -1207,7 +1209,7 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); } /** @@ -1228,28 +1230,30 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); } /** - * nfsd_file_create - Get a struct nfsd_file, do not open + * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file * @rqstp: the RPC transaction being executed * @fhp: the NFS filehandle of the file just created * @may_flags: NFSD_MAY_ settings for the file + * @file: cached, already-open file (may be NULL) * @pnf: OUT: new or found "struct nfsd_file" object * - * The nfsd_file_object returned by this API is reference-counted - * but not garbage-collected. The object is released immediately - * one RCU grace period after the final nfsd_file_put(). + * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist, + * and @file is non-NULL, use it to instantiate a new nfsd_file instead of + * opening a new one. * * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in * network byte order is returned. */ __be32 -nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf) +nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); } /* diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index b7efb2c3ddb1..41516a4263ea 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -60,7 +60,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); -__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **nfp); +__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **nfp); int nfsd_file_cache_stats_show(struct seq_file *m, void *v); #endif /* _FS_NFSD_FILECACHE_H */ diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index bd880d55f565..9b81d012666e 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -937,7 +937,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * the client wants us to do more in this compound: */ if (!nfsd4_last_compound_op(rqstp)) - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* check stateid */ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, @@ -2607,12 +2607,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) cstate->minorversion = args->minorversion; fh_init(current_fh, NFS4_FHSIZE); fh_init(save_fh, NFS4_FHSIZE); - /* * Don't use the deferral mechanism for NFSv4; compounds make it * too hard to avoid non-idempotency problems. */ - __clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); /* * According to RFC3010, this takes precedence over all other errors. @@ -2734,7 +2733,7 @@ encode_op: out: cstate->status = status; /* Reset deferral mechanism for RPC deferrals */ - __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); return rpc_success; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7b2ee535ade8..4809ae0f0138 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5262,18 +5262,10 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, if (!fp->fi_fds[oflag]) { spin_unlock(&fp->fi_lock); - if (!open->op_filp) { - status = nfsd_file_acquire(rqstp, cur_fh, access, &nf); - if (status != nfs_ok) - goto out_put_access; - } else { - status = nfsd_file_create(rqstp, cur_fh, access, &nf); - if (status != nfs_ok) - goto out_put_access; - nf->nf_file = open->op_filp; - open->op_filp = NULL; - trace_nfsd_file_create(rqstp, access, nf); - } + status = nfsd_file_acquire_opened(rqstp, cur_fh, access, + open->op_filp, &nf); + if (status != nfs_ok) + goto out_put_access; spin_lock(&fp->fi_lock); if (!fp->fi_fds[oflag]) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ebb4d02a42ce..97edb32be77f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2523,7 +2523,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) - __clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); return true; } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index a5570cf75f3f..9744443c3965 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index c852ae8eaf37..8f9c82d9e075 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -981,43 +981,6 @@ TRACE_EVENT(nfsd_file_acquire, ) ); -TRACE_EVENT(nfsd_file_create, - TP_PROTO( - const struct svc_rqst *rqstp, - unsigned int may_flags, - const struct nfsd_file *nf - ), - - TP_ARGS(rqstp, may_flags, nf), - - TP_STRUCT__entry( - __field(const void *, nf_inode) - __field(const void *, nf_file) - __field(unsigned long, may_flags) - __field(unsigned long, nf_flags) - __field(unsigned long, nf_may) - __field(unsigned int, nf_ref) - __field(u32, xid) - ), - - TP_fast_assign( - __entry->nf_inode = nf->nf_inode; - __entry->nf_file = nf->nf_file; - __entry->may_flags = may_flags; - __entry->nf_flags = nf->nf_flags; - __entry->nf_may = nf->nf_may; - __entry->nf_ref = refcount_read(&nf->nf_ref); - __entry->xid = be32_to_cpu(rqstp->rq_xid); - ), - - TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", - __entry->xid, __entry->nf_inode, - show_nfsd_may_flags(__entry->may_flags), - __entry->nf_ref, show_nf_flags(__entry->nf_flags), - show_nfsd_may_flags(__entry->nf_may), __entry->nf_file - ) -); - TRACE_EVENT(nfsd_file_insert_err, TP_PROTO( const struct svc_rqst *rqstp, @@ -1079,8 +1042,8 @@ TRACE_EVENT(nfsd_file_cons_err, ) ); -TRACE_EVENT(nfsd_file_open, - TP_PROTO(struct nfsd_file *nf, __be32 status), +DECLARE_EVENT_CLASS(nfsd_file_open_class, + TP_PROTO(const struct nfsd_file *nf, __be32 status), TP_ARGS(nf, status), TP_STRUCT__entry( __field(void *, nf_inode) /* cannot be dereferenced */ @@ -1104,6 +1067,17 @@ TRACE_EVENT(nfsd_file_open, __entry->nf_file) ) +#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \ +DEFINE_EVENT(nfsd_file_open_class, name, \ + TP_PROTO( \ + const struct nfsd_file *nf, \ + __be32 status \ + ), \ + TP_ARGS(nf, status)) + +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open); +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened); + TRACE_EVENT(nfsd_file_is_cached, TP_PROTO( const struct inode *inode, diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 1d7c2a812fc1..34e416327dd4 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -595,7 +595,7 @@ static void udf_do_extend_final_block(struct inode *inode, */ if (new_elen <= (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) return; - added_bytes = (last_ext->extLength & UDF_EXTENT_LENGTH_MASK) - new_elen; + added_bytes = new_elen - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); last_ext->extLength += added_bytes; UDF_I(inode)->i_lenExtents += added_bytes; @@ -684,7 +684,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, struct kernel_lb_addr eloc, tmpeloc; int c = 1; loff_t lbcount = 0, b_off = 0; - udf_pblk_t newblocknum, newblock; + udf_pblk_t newblocknum, newblock = 0; sector_t offset = 0; int8_t etype; struct udf_inode_info *iinfo = UDF_I(inode); @@ -787,7 +787,6 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, ret = udf_do_extend_file(inode, &prev_epos, laarr, hole_len); if (ret < 0) { *err = ret; - newblock = 0; goto out_free; } c = 0; @@ -852,7 +851,6 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, goal, err); if (!newblocknum) { *err = -ENOSPC; - newblock = 0; goto out_free; } if (isBeyondEOF) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 4c16c8c31fcb..35f574421670 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4666,7 +4666,12 @@ xfs_btree_space_to_height( const unsigned int *limits, unsigned long long leaf_blocks) { - unsigned long long node_blocks = limits[1]; + /* + * The root btree block can have fewer than minrecs pointers in it + * because the tree might not be big enough to require that amount of + * fanout. Hence it has a minimum size of 2 pointers, not limits[1]. + */ + unsigned long long node_blocks = 2; unsigned long long blocks_left = leaf_blocks - 1; unsigned int height = 1; diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index ad22a003f959..f3d328e4a440 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -236,6 +236,7 @@ xfs_extent_busy_update_extent( * */ busyp->bno = fend; + busyp->length = bend - fend; } else if (bbno < fbno) { /* * Case 8: diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index f35e2cee5265..ddeaccc04aec 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1853,12 +1853,20 @@ xfs_inodegc_worker( struct xfs_inodegc, work); struct llist_node *node = llist_del_all(&gc->list); struct xfs_inode *ip, *n; + unsigned int nofs_flag; WRITE_ONCE(gc->items, 0); if (!node) return; + /* + * We can allocate memory here while doing writeback on behalf of + * memory reclaim. To avoid memory allocation deadlocks set the + * task-wide nofs context for the following operations. + */ + nofs_flag = memalloc_nofs_save(); + ip = llist_entry(node, struct xfs_inode, i_gclist); trace_xfs_inodegc_worker(ip->i_mount, READ_ONCE(gc->shrinker_hits)); @@ -1867,6 +1875,8 @@ xfs_inodegc_worker( xfs_iflags_set(ip, XFS_INACTIVATING); xfs_inodegc_inactivate(ip); } + + memalloc_nofs_restore(nofs_flag); } /* diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 13f1b2add390..736510bc241b 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -754,7 +754,7 @@ xfs_bulkstat_fmt( static int xfs_bulk_ireq_setup( struct xfs_mount *mp, - struct xfs_bulk_ireq *hdr, + const struct xfs_bulk_ireq *hdr, struct xfs_ibulk *breq, void __user *ubuffer) { @@ -780,7 +780,7 @@ xfs_bulk_ireq_setup( switch (hdr->ino) { case XFS_BULK_IREQ_SPECIAL_ROOT: - hdr->ino = mp->m_sb.sb_rootino; + breq->startino = mp->m_sb.sb_rootino; break; default: return -EINVAL; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 669c1bc5c3a7..fc1946f80a4a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -83,7 +83,7 @@ xfs_iomap_valid( return true; } -const struct iomap_page_ops xfs_iomap_page_ops = { +static const struct iomap_page_ops xfs_iomap_page_ops = { .iomap_valid = xfs_iomap_valid, }; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index ff53d40a2dae..e2c542f6dcd4 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -68,7 +68,7 @@ restart: while (1) { struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH]; - int error = 0; + int error; int i; mutex_lock(&qi->qi_tree_lock); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index fe46bce8cae6..5535778a98f9 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -416,8 +416,6 @@ xfs_reflink_fill_cow_hole( goto convert; } - ASSERT(cmap->br_startoff > imap->br_startoff); - /* Allocate the entire reservation as unwritten blocks. */ nimaps = 1; error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, diff --git a/include/drm/drm_plane_helper.h b/include/drm/drm_plane_helper.h index ff83d2621687..3a574e8cd22f 100644 --- a/include/drm/drm_plane_helper.h +++ b/include/drm/drm_plane_helper.h @@ -26,6 +26,7 @@ #include <linux/types.h> +struct drm_atomic_state; struct drm_crtc; struct drm_framebuffer; struct drm_modeset_acquire_ctx; diff --git a/include/linux/bio.h b/include/linux/bio.h index 22078a28d7cb..c1da63f6c808 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -475,6 +475,8 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); +extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, + struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); void guard_bio_eod(struct bio *bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 301cf1cf4f2f..43d4e073b111 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1395,6 +1395,7 @@ struct block_device_operations { void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); + char *(*devnode)(struct gendisk *disk, umode_t *mode); /* returns the length of the identifier or a negative errno: */ int (*get_unique_id)(struct gendisk *disk, u8 id[16], enum blk_unique_id id_type); diff --git a/include/linux/fs.h b/include/linux/fs.h index 066555ad1bf8..c1769a2c5d70 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1119,6 +1119,9 @@ struct file_lock { int state; /* state of grant or error if -ve */ unsigned int debug_id; } afs; + struct { + struct inode *inode; + } ceph; } fl_u; } __randomize_layout; diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index dcd8a563ab52..128a67a40065 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -292,6 +292,8 @@ struct io_ring_ctx { struct { spinlock_t completion_lock; + bool poll_multi_queue; + /* * ->iopoll_list is protected by the ctx->uring_lock for * io_uring instances that don't use IORING_SETUP_SQPOLL. @@ -300,7 +302,6 @@ struct io_ring_ctx { */ struct io_wq_work_list iopoll_list; struct io_hash_table cancel_table; - bool poll_multi_queue; struct llist_head work_llist; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7c393da396b1..b957b8f22a6b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -318,7 +318,7 @@ struct mlx5_cmd { struct mlx5_cmd_debug dbg; struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES]; int checksum_disabled; - struct mlx5_cmd_stats *stats; + struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; }; struct mlx5_cmd_mailbox { diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 25765556223a..a3f8cdca90c8 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -7,7 +7,6 @@ #define __LINUX_MTD_SPI_NOR_H #include <linux/bitops.h> -#include <linux/mtd/cfi.h> #include <linux/mtd/mtd.h> #include <linux/spi/spi-mem.h> diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h new file mode 100644 index 000000000000..f9c5ac80d59b --- /dev/null +++ b/include/linux/pktcdvd.h @@ -0,0 +1,197 @@ +/* + * Copyright (C) 2000 Jens Axboe <axboe@suse.de> + * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com> + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * + * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and + * DVD-RW devices. + * + */ +#ifndef __PKTCDVD_H +#define __PKTCDVD_H + +#include <linux/blkdev.h> +#include <linux/completion.h> +#include <linux/cdrom.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> +#include <linux/mempool.h> +#include <uapi/linux/pktcdvd.h> + +/* default bio write queue congestion marks */ +#define PKT_WRITE_CONGESTION_ON 10000 +#define PKT_WRITE_CONGESTION_OFF 9000 + + +struct packet_settings +{ + __u32 size; /* packet size in (512 byte) sectors */ + __u8 fp; /* fixed packets */ + __u8 link_loss; /* the rest is specified + * as per Mt Fuji */ + __u8 write_type; + __u8 track_mode; + __u8 block_mode; +}; + +/* + * Very crude stats for now + */ +struct packet_stats +{ + unsigned long pkt_started; + unsigned long pkt_ended; + unsigned long secs_w; + unsigned long secs_rg; + unsigned long secs_r; +}; + +struct packet_cdrw +{ + struct list_head pkt_free_list; + struct list_head pkt_active_list; + spinlock_t active_list_lock; /* Serialize access to pkt_active_list */ + struct task_struct *thread; + atomic_t pending_bios; +}; + +/* + * Switch to high speed reading after reading this many kilobytes + * with no interspersed writes. + */ +#define HI_SPEED_SWITCH 512 + +struct packet_iosched +{ + atomic_t attention; /* Set to non-zero when queue processing is needed */ + int writing; /* Non-zero when writing, zero when reading */ + spinlock_t lock; /* Protecting read/write queue manipulations */ + struct bio_list read_queue; + struct bio_list write_queue; + sector_t last_write; /* The sector where the last write ended */ + int successive_reads; +}; + +/* + * 32 buffers of 2048 bytes + */ +#if (PAGE_SIZE % CD_FRAMESIZE) != 0 +#error "PAGE_SIZE must be a multiple of CD_FRAMESIZE" +#endif +#define PACKET_MAX_SIZE 128 +#define FRAMES_PER_PAGE (PAGE_SIZE / CD_FRAMESIZE) +#define PACKET_MAX_SECTORS (PACKET_MAX_SIZE * CD_FRAMESIZE >> 9) + +enum packet_data_state { + PACKET_IDLE_STATE, /* Not used at the moment */ + PACKET_WAITING_STATE, /* Waiting for more bios to arrive, so */ + /* we don't have to do as much */ + /* data gathering */ + PACKET_READ_WAIT_STATE, /* Waiting for reads to fill in holes */ + PACKET_WRITE_WAIT_STATE, /* Waiting for the write to complete */ + PACKET_RECOVERY_STATE, /* Recover after read/write errors */ + PACKET_FINISHED_STATE, /* After write has finished */ + + PACKET_NUM_STATES /* Number of possible states */ +}; + +/* + * Information needed for writing a single packet + */ +struct pktcdvd_device; + +struct packet_data +{ + struct list_head list; + + spinlock_t lock; /* Lock protecting state transitions and */ + /* orig_bios list */ + + struct bio_list orig_bios; /* Original bios passed to pkt_make_request */ + /* that will be handled by this packet */ + int write_size; /* Total size of all bios in the orig_bios */ + /* list, measured in number of frames */ + + struct bio *w_bio; /* The bio we will send to the real CD */ + /* device once we have all data for the */ + /* packet we are going to write */ + sector_t sector; /* First sector in this packet */ + int frames; /* Number of frames in this packet */ + + enum packet_data_state state; /* Current state */ + atomic_t run_sm; /* Incremented whenever the state */ + /* machine needs to be run */ + long sleep_time; /* Set this to non-zero to make the state */ + /* machine run after this many jiffies. */ + + atomic_t io_wait; /* Number of pending IO operations */ + atomic_t io_errors; /* Number of read/write errors during IO */ + + struct bio *r_bios[PACKET_MAX_SIZE]; /* bios to use during data gathering */ + struct page *pages[PACKET_MAX_SIZE / FRAMES_PER_PAGE]; + + int cache_valid; /* If non-zero, the data for the zone defined */ + /* by the sector variable is completely cached */ + /* in the pages[] vector. */ + + int id; /* ID number for debugging */ + struct pktcdvd_device *pd; +}; + +struct pkt_rb_node { + struct rb_node rb_node; + struct bio *bio; +}; + +struct packet_stacked_data +{ + struct bio *bio; /* Original read request bio */ + struct pktcdvd_device *pd; +}; +#define PSD_POOL_SIZE 64 + +struct pktcdvd_device +{ + struct block_device *bdev; /* dev attached */ + dev_t pkt_dev; /* our dev */ + char name[20]; + struct packet_settings settings; + struct packet_stats stats; + int refcnt; /* Open count */ + int write_speed; /* current write speed, kB/s */ + int read_speed; /* current read speed, kB/s */ + unsigned long offset; /* start offset */ + __u8 mode_offset; /* 0 / 8 */ + __u8 type; + unsigned long flags; + __u16 mmc3_profile; + __u32 nwa; /* next writable address */ + __u32 lra; /* last recorded address */ + struct packet_cdrw cdrw; + wait_queue_head_t wqueue; + + spinlock_t lock; /* Serialize access to bio_queue */ + struct rb_root bio_queue; /* Work queue of bios we need to handle */ + int bio_queue_size; /* Number of nodes in bio_queue */ + bool congested; /* Someone is waiting for bio_queue_size + * to drop. */ + sector_t current_sector; /* Keep track of where the elevator is */ + atomic_t scan_queue; /* Set to non-zero when pkt_handle_queue */ + /* needs to be run. */ + mempool_t rb_pool; /* mempool for pkt_rb_node allocations */ + + struct packet_iosched iosched; + struct gendisk *disk; + + int write_congestion_off; + int write_congestion_on; + + struct device *dev; /* sysfs pktcdvd[0-7] dev */ + + struct dentry *dfs_d_root; /* debugfs: devname directory */ + struct dentry *dfs_f_info; /* debugfs: info file */ +}; + +#endif /* __PKTCDVD_H */ diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index cd188a527d16..3b35b6f6533a 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -92,6 +92,11 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, char __user *, size_t); extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *); +/* returns true if the msg is in-flight, i.e., already eaten by the peer */ +static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) { + return (msg->copied != 0 && list_empty(&msg->list)); +} + struct rpc_clnt; extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); extern int rpc_remove_client_dir(struct rpc_clnt *); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index d5a5ae926380..ba717eac0229 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -15,6 +15,7 @@ struct key; struct sock; struct socket; struct rxrpc_call; +enum rxrpc_abort_reason; enum rxrpc_interruptibility { RXRPC_INTERRUPTIBLE, /* Call is interruptible */ @@ -55,7 +56,7 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, struct iov_iter *, size_t *, bool, u32 *, u16 *); bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, - u32, int, const char *); + u32, int, enum rxrpc_abort_reason); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index cab52b0f11d0..34c03707fb6e 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -236,6 +236,14 @@ enum { ISCSI_SESSION_FREE, }; +enum { + ISCSI_SESSION_TARGET_UNBOUND, + ISCSI_SESSION_TARGET_ALLOCATED, + ISCSI_SESSION_TARGET_SCANNED, + ISCSI_SESSION_TARGET_UNBINDING, + ISCSI_SESSION_TARGET_MAX, +}; + #define ISCSI_MAX_TARGET -1 struct iscsi_cls_session { @@ -264,6 +272,7 @@ struct iscsi_cls_session { */ pid_t creator; int state; + int target_state; /* session target bind state */ int sid; /* session id */ void *dd_data; /* LLD private data */ struct device dev; /* sysfs transport/container device */ diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 5f9dd7389536..283db0ea3db4 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -16,7 +16,107 @@ /* * Declare tracing information enums and their string mappings for display. */ +#define rxrpc_abort_reasons \ + /* AFS errors */ \ + EM(afs_abort_general_error, "afs-error") \ + EM(afs_abort_interrupted, "afs-intr") \ + EM(afs_abort_oom, "afs-oom") \ + EM(afs_abort_op_not_supported, "afs-op-notsupp") \ + EM(afs_abort_probeuuid_negative, "afs-probeuuid-neg") \ + EM(afs_abort_send_data_error, "afs-send-data") \ + EM(afs_abort_unmarshal_error, "afs-unmarshal") \ + /* rxperf errors */ \ + EM(rxperf_abort_general_error, "rxperf-error") \ + EM(rxperf_abort_oom, "rxperf-oom") \ + EM(rxperf_abort_op_not_supported, "rxperf-op-notsupp") \ + EM(rxperf_abort_unmarshal_error, "rxperf-unmarshal") \ + /* RxKAD security errors */ \ + EM(rxkad_abort_1_short_check, "rxkad1-short-check") \ + EM(rxkad_abort_1_short_data, "rxkad1-short-data") \ + EM(rxkad_abort_1_short_encdata, "rxkad1-short-encdata") \ + EM(rxkad_abort_1_short_header, "rxkad1-short-hdr") \ + EM(rxkad_abort_2_short_check, "rxkad2-short-check") \ + EM(rxkad_abort_2_short_data, "rxkad2-short-data") \ + EM(rxkad_abort_2_short_header, "rxkad2-short-hdr") \ + EM(rxkad_abort_2_short_len, "rxkad2-short-len") \ + EM(rxkad_abort_bad_checksum, "rxkad2-bad-cksum") \ + EM(rxkad_abort_chall_key_expired, "rxkad-chall-key-exp") \ + EM(rxkad_abort_chall_level, "rxkad-chall-level") \ + EM(rxkad_abort_chall_no_key, "rxkad-chall-nokey") \ + EM(rxkad_abort_chall_short, "rxkad-chall-short") \ + EM(rxkad_abort_chall_version, "rxkad-chall-version") \ + EM(rxkad_abort_resp_bad_callid, "rxkad-resp-bad-callid") \ + EM(rxkad_abort_resp_bad_checksum, "rxkad-resp-bad-cksum") \ + EM(rxkad_abort_resp_bad_param, "rxkad-resp-bad-param") \ + EM(rxkad_abort_resp_call_ctr, "rxkad-resp-call-ctr") \ + EM(rxkad_abort_resp_call_state, "rxkad-resp-call-state") \ + EM(rxkad_abort_resp_key_expired, "rxkad-resp-key-exp") \ + EM(rxkad_abort_resp_key_rejected, "rxkad-resp-key-rej") \ + EM(rxkad_abort_resp_level, "rxkad-resp-level") \ + EM(rxkad_abort_resp_nokey, "rxkad-resp-nokey") \ + EM(rxkad_abort_resp_ooseq, "rxkad-resp-ooseq") \ + EM(rxkad_abort_resp_short, "rxkad-resp-short") \ + EM(rxkad_abort_resp_short_tkt, "rxkad-resp-short-tkt") \ + EM(rxkad_abort_resp_tkt_aname, "rxkad-resp-tk-aname") \ + EM(rxkad_abort_resp_tkt_expired, "rxkad-resp-tk-exp") \ + EM(rxkad_abort_resp_tkt_future, "rxkad-resp-tk-future") \ + EM(rxkad_abort_resp_tkt_inst, "rxkad-resp-tk-inst") \ + EM(rxkad_abort_resp_tkt_len, "rxkad-resp-tk-len") \ + EM(rxkad_abort_resp_tkt_realm, "rxkad-resp-tk-realm") \ + EM(rxkad_abort_resp_tkt_short, "rxkad-resp-tk-short") \ + EM(rxkad_abort_resp_tkt_sinst, "rxkad-resp-tk-sinst") \ + EM(rxkad_abort_resp_tkt_sname, "rxkad-resp-tk-sname") \ + EM(rxkad_abort_resp_unknown_tkt, "rxkad-resp-unknown-tkt") \ + EM(rxkad_abort_resp_version, "rxkad-resp-version") \ + /* rxrpc errors */ \ + EM(rxrpc_abort_call_improper_term, "call-improper-term") \ + EM(rxrpc_abort_call_reset, "call-reset") \ + EM(rxrpc_abort_call_sendmsg, "call-sendmsg") \ + EM(rxrpc_abort_call_sock_release, "call-sock-rel") \ + EM(rxrpc_abort_call_sock_release_tba, "call-sock-rel-tba") \ + EM(rxrpc_abort_call_timeout, "call-timeout") \ + EM(rxrpc_abort_no_service_key, "no-serv-key") \ + EM(rxrpc_abort_nomem, "nomem") \ + EM(rxrpc_abort_service_not_offered, "serv-not-offered") \ + EM(rxrpc_abort_shut_down, "shut-down") \ + EM(rxrpc_abort_unsupported_security, "unsup-sec") \ + EM(rxrpc_badmsg_bad_abort, "bad-abort") \ + EM(rxrpc_badmsg_bad_jumbo, "bad-jumbo") \ + EM(rxrpc_badmsg_short_ack, "short-ack") \ + EM(rxrpc_badmsg_short_ack_info, "short-ack-info") \ + EM(rxrpc_badmsg_short_hdr, "short-hdr") \ + EM(rxrpc_badmsg_unsupported_packet, "unsup-pkt") \ + EM(rxrpc_badmsg_zero_call, "zero-call") \ + EM(rxrpc_badmsg_zero_seq, "zero-seq") \ + EM(rxrpc_badmsg_zero_service, "zero-service") \ + EM(rxrpc_eproto_ackr_outside_window, "ackr-out-win") \ + EM(rxrpc_eproto_ackr_sack_overflow, "ackr-sack-over") \ + EM(rxrpc_eproto_ackr_short_sack, "ackr-short-sack") \ + EM(rxrpc_eproto_ackr_zero, "ackr-zero") \ + EM(rxrpc_eproto_bad_upgrade, "bad-upgrade") \ + EM(rxrpc_eproto_data_after_last, "data-after-last") \ + EM(rxrpc_eproto_different_last, "diff-last") \ + EM(rxrpc_eproto_early_reply, "early-reply") \ + EM(rxrpc_eproto_improper_term, "improper-term") \ + EM(rxrpc_eproto_no_client_call, "no-cl-call") \ + EM(rxrpc_eproto_no_client_conn, "no-cl-conn") \ + EM(rxrpc_eproto_no_service_call, "no-sv-call") \ + EM(rxrpc_eproto_reupgrade, "re-upgrade") \ + EM(rxrpc_eproto_rxnull_challenge, "rxnull-chall") \ + EM(rxrpc_eproto_rxnull_response, "rxnull-resp") \ + EM(rxrpc_eproto_tx_rot_last, "tx-rot-last") \ + EM(rxrpc_eproto_unexpected_ack, "unex-ack") \ + EM(rxrpc_eproto_unexpected_ackall, "unex-ackall") \ + EM(rxrpc_eproto_unexpected_implicit_end, "unex-impl-end") \ + EM(rxrpc_eproto_unexpected_reply, "unex-reply") \ + EM(rxrpc_eproto_wrong_security, "wrong-sec") \ + EM(rxrpc_recvmsg_excess_data, "recvmsg-excess") \ + EM(rxrpc_recvmsg_short_data, "recvmsg-short") \ + E_(rxrpc_sendmsg_late_send, "sendmsg-late") + #define rxrpc_call_poke_traces \ + EM(rxrpc_call_poke_abort, "Abort") \ + EM(rxrpc_call_poke_complete, "Compl") \ EM(rxrpc_call_poke_error, "Error") \ EM(rxrpc_call_poke_idle, "Idle") \ EM(rxrpc_call_poke_start, "Start") \ @@ -26,6 +126,7 @@ #define rxrpc_skb_traces \ EM(rxrpc_skb_eaten_by_unshare, "ETN unshare ") \ EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \ + EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ EM(rxrpc_skb_get_conn_work, "GET conn-work") \ EM(rxrpc_skb_get_local_work, "GET locl-work") \ EM(rxrpc_skb_get_reject_work, "GET rej-work ") \ @@ -35,6 +136,7 @@ EM(rxrpc_skb_new_error_report, "NEW error-rpt") \ EM(rxrpc_skb_new_jumbo_subpacket, "NEW jumbo-sub") \ EM(rxrpc_skb_new_unshared, "NEW unshared ") \ + EM(rxrpc_skb_put_conn_secured, "PUT conn-secd") \ EM(rxrpc_skb_put_conn_work, "PUT conn-work") \ EM(rxrpc_skb_put_error_report, "PUT error-rep") \ EM(rxrpc_skb_put_input, "PUT input ") \ @@ -76,7 +178,6 @@ #define rxrpc_peer_traces \ EM(rxrpc_peer_free, "FREE ") \ EM(rxrpc_peer_get_accept, "GET accept ") \ - EM(rxrpc_peer_get_activate_call, "GET act-call") \ EM(rxrpc_peer_get_bundle, "GET bundle ") \ EM(rxrpc_peer_get_client_conn, "GET cln-conn") \ EM(rxrpc_peer_get_input, "GET input ") \ @@ -89,7 +190,6 @@ EM(rxrpc_peer_put_bundle, "PUT bundle ") \ EM(rxrpc_peer_put_call, "PUT call ") \ EM(rxrpc_peer_put_conn, "PUT conn ") \ - EM(rxrpc_peer_put_discard_tmp, "PUT disc-tmp") \ EM(rxrpc_peer_put_input, "PUT input ") \ EM(rxrpc_peer_put_input_error, "PUT inpt-err") \ E_(rxrpc_peer_put_keepalive, "PUT keepaliv") @@ -99,6 +199,7 @@ EM(rxrpc_bundle_get_client_call, "GET clt-call") \ EM(rxrpc_bundle_get_client_conn, "GET clt-conn") \ EM(rxrpc_bundle_get_service_conn, "GET svc-conn") \ + EM(rxrpc_bundle_put_call, "PUT call ") \ EM(rxrpc_bundle_put_conn, "PUT conn ") \ EM(rxrpc_bundle_put_discard, "PUT discard ") \ E_(rxrpc_bundle_new, "NEW ") @@ -109,14 +210,14 @@ EM(rxrpc_conn_get_call_input, "GET inp-call") \ EM(rxrpc_conn_get_conn_input, "GET inp-conn") \ EM(rxrpc_conn_get_idle, "GET idle ") \ - EM(rxrpc_conn_get_poke, "GET poke ") \ + EM(rxrpc_conn_get_poke_abort, "GET pk-abort") \ + EM(rxrpc_conn_get_poke_timer, "GET poke ") \ EM(rxrpc_conn_get_service_conn, "GET svc-conn") \ EM(rxrpc_conn_new_client, "NEW client ") \ EM(rxrpc_conn_new_service, "NEW service ") \ EM(rxrpc_conn_put_call, "PUT call ") \ EM(rxrpc_conn_put_call_input, "PUT inp-call") \ EM(rxrpc_conn_put_conn_input, "PUT inp-conn") \ - EM(rxrpc_conn_put_discard, "PUT discard ") \ EM(rxrpc_conn_put_discard_idle, "PUT disc-idl") \ EM(rxrpc_conn_put_local_dead, "PUT loc-dead") \ EM(rxrpc_conn_put_noreuse, "PUT noreuse ") \ @@ -124,10 +225,10 @@ EM(rxrpc_conn_put_service_reaped, "PUT svc-reap") \ EM(rxrpc_conn_put_unbundle, "PUT unbundle") \ EM(rxrpc_conn_put_unidle, "PUT unidle ") \ + EM(rxrpc_conn_put_work, "PUT work ") \ EM(rxrpc_conn_queue_challenge, "QUE chall ") \ EM(rxrpc_conn_queue_retry_work, "QUE retry-wk") \ EM(rxrpc_conn_queue_rx_work, "QUE rx-work ") \ - EM(rxrpc_conn_queue_timer, "QUE timer ") \ EM(rxrpc_conn_see_new_service_conn, "SEE new-svc ") \ EM(rxrpc_conn_see_reap_service, "SEE reap-svc") \ E_(rxrpc_conn_see_work, "SEE work ") @@ -138,16 +239,16 @@ EM(rxrpc_client_chan_activate, "ChActv") \ EM(rxrpc_client_chan_disconnect, "ChDisc") \ EM(rxrpc_client_chan_pass, "ChPass") \ - EM(rxrpc_client_chan_wait_failed, "ChWtFl") \ EM(rxrpc_client_cleanup, "Clean ") \ EM(rxrpc_client_discard, "Discar") \ - EM(rxrpc_client_duplicate, "Duplic") \ EM(rxrpc_client_exposed, "Expose") \ EM(rxrpc_client_replace, "Replac") \ + EM(rxrpc_client_queue_new_call, "Q-Call") \ EM(rxrpc_client_to_active, "->Actv") \ E_(rxrpc_client_to_idle, "->Idle") #define rxrpc_call_traces \ + EM(rxrpc_call_get_io_thread, "GET iothread") \ EM(rxrpc_call_get_input, "GET input ") \ EM(rxrpc_call_get_kernel_service, "GET krnl-srv") \ EM(rxrpc_call_get_notify_socket, "GET notify ") \ @@ -160,6 +261,7 @@ EM(rxrpc_call_new_prealloc_service, "NEW prealloc") \ EM(rxrpc_call_put_discard_prealloc, "PUT disc-pre") \ EM(rxrpc_call_put_discard_error, "PUT disc-err") \ + EM(rxrpc_call_put_io_thread, "PUT iothread") \ EM(rxrpc_call_put_input, "PUT input ") \ EM(rxrpc_call_put_kernel, "PUT kernel ") \ EM(rxrpc_call_put_poke, "PUT poke ") \ @@ -169,10 +271,12 @@ EM(rxrpc_call_put_sendmsg, "PUT sendmsg ") \ EM(rxrpc_call_put_unnotify, "PUT unnotify") \ EM(rxrpc_call_put_userid_exists, "PUT u-exists") \ + EM(rxrpc_call_put_userid, "PUT user-id ") \ EM(rxrpc_call_see_accept, "SEE accept ") \ EM(rxrpc_call_see_activate_client, "SEE act-clnt") \ EM(rxrpc_call_see_connect_failed, "SEE con-fail") \ EM(rxrpc_call_see_connected, "SEE connect ") \ + EM(rxrpc_call_see_disconnected, "SEE disconn ") \ EM(rxrpc_call_see_distribute_error, "SEE dist-err") \ EM(rxrpc_call_see_input, "SEE input ") \ EM(rxrpc_call_see_release, "SEE release ") \ @@ -376,6 +480,7 @@ #define EM(a, b) a, #define E_(a, b) a +enum rxrpc_abort_reason { rxrpc_abort_reasons } __mode(byte); enum rxrpc_bundle_trace { rxrpc_bundle_traces } __mode(byte); enum rxrpc_call_poke_trace { rxrpc_call_poke_traces } __mode(byte); enum rxrpc_call_trace { rxrpc_call_traces } __mode(byte); @@ -404,9 +509,13 @@ enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte); */ #undef EM #undef E_ + +#ifndef RXRPC_TRACE_ONLY_DEFINE_ENUMS + #define EM(a, b) TRACE_DEFINE_ENUM(a); #define E_(a, b) TRACE_DEFINE_ENUM(a); +rxrpc_abort_reasons; rxrpc_bundle_traces; rxrpc_call_poke_traces; rxrpc_call_traces; @@ -657,14 +766,14 @@ TRACE_EVENT(rxrpc_rx_done, ); TRACE_EVENT(rxrpc_abort, - TP_PROTO(unsigned int call_nr, const char *why, u32 cid, u32 call_id, - rxrpc_seq_t seq, int abort_code, int error), + TP_PROTO(unsigned int call_nr, enum rxrpc_abort_reason why, + u32 cid, u32 call_id, rxrpc_seq_t seq, int abort_code, int error), TP_ARGS(call_nr, why, cid, call_id, seq, abort_code, error), TP_STRUCT__entry( __field(unsigned int, call_nr ) - __array(char, why, 4 ) + __field(enum rxrpc_abort_reason, why ) __field(u32, cid ) __field(u32, call_id ) __field(rxrpc_seq_t, seq ) @@ -673,8 +782,8 @@ TRACE_EVENT(rxrpc_abort, ), TP_fast_assign( - memcpy(__entry->why, why, 4); __entry->call_nr = call_nr; + __entry->why = why; __entry->cid = cid; __entry->call_id = call_id; __entry->abort_code = abort_code; @@ -685,7 +794,8 @@ TRACE_EVENT(rxrpc_abort, TP_printk("c=%08x %08x:%08x s=%u a=%d e=%d %s", __entry->call_nr, __entry->cid, __entry->call_id, __entry->seq, - __entry->abort_code, __entry->error, __entry->why) + __entry->abort_code, __entry->error, + __print_symbolic(__entry->why, rxrpc_abort_reasons)) ); TRACE_EVENT(rxrpc_call_complete, @@ -1521,30 +1631,6 @@ TRACE_EVENT(rxrpc_improper_term, __entry->abort_code) ); -TRACE_EVENT(rxrpc_rx_eproto, - TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial, - const char *why), - - TP_ARGS(call, serial, why), - - TP_STRUCT__entry( - __field(unsigned int, call ) - __field(rxrpc_serial_t, serial ) - __field(const char *, why ) - ), - - TP_fast_assign( - __entry->call = call ? call->debug_id : 0; - __entry->serial = serial; - __entry->why = why; - ), - - TP_printk("c=%08x EPROTO %08x %s", - __entry->call, - __entry->serial, - __entry->why) - ); - TRACE_EVENT(rxrpc_connect_call, TP_PROTO(struct rxrpc_call *call), @@ -1842,6 +1928,8 @@ TRACE_EVENT(rxrpc_call_poked, #undef EM #undef E_ + +#endif /* RXRPC_TRACE_ONLY_DEFINE_ENUMS */ #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/pktcdvd.h b/include/uapi/linux/pktcdvd.h new file mode 100644 index 000000000000..9cbb55d21c94 --- /dev/null +++ b/include/uapi/linux/pktcdvd.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2000 Jens Axboe <axboe@suse.de> + * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com> + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * + * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and + * DVD-RW devices. + * + */ +#ifndef _UAPI__PKTCDVD_H +#define _UAPI__PKTCDVD_H + +#include <linux/types.h> + +/* + * 1 for normal debug messages, 2 is very verbose. 0 to turn it off. + */ +#define PACKET_DEBUG 1 + +#define MAX_WRITERS 8 + +#define PKT_RB_POOL_SIZE 512 + +/* + * How long we should hold a non-full packet before starting data gathering. + */ +#define PACKET_WAIT_TIME (HZ * 5 / 1000) + +/* + * use drive write caching -- we need deferred error handling to be + * able to successfully recover with this option (drive will return good + * status as soon as the cdb is validated). + */ +#if defined(CONFIG_CDROM_PKTCDVD_WCACHE) +#define USE_WCACHING 1 +#else +#define USE_WCACHING 0 +#endif + +/* + * No user-servicable parts beyond this point -> + */ + +/* + * device types + */ +#define PACKET_CDR 1 +#define PACKET_CDRW 2 +#define PACKET_DVDR 3 +#define PACKET_DVDRW 4 + +/* + * flags + */ +#define PACKET_WRITABLE 1 /* pd is writable */ +#define PACKET_NWA_VALID 2 /* next writable address valid */ +#define PACKET_LRA_VALID 3 /* last recorded address valid */ +#define PACKET_MERGE_SEGS 4 /* perform segment merging to keep */ + /* underlying cdrom device happy */ + +/* + * Disc status -- from READ_DISC_INFO + */ +#define PACKET_DISC_EMPTY 0 +#define PACKET_DISC_INCOMPLETE 1 +#define PACKET_DISC_COMPLETE 2 +#define PACKET_DISC_OTHER 3 + +/* + * write type, and corresponding data block type + */ +#define PACKET_MODE1 1 +#define PACKET_MODE2 2 +#define PACKET_BLOCK_MODE1 8 +#define PACKET_BLOCK_MODE2 10 + +/* + * Last session/border status + */ +#define PACKET_SESSION_EMPTY 0 +#define PACKET_SESSION_INCOMPLETE 1 +#define PACKET_SESSION_RESERVED 2 +#define PACKET_SESSION_COMPLETE 3 + +#define PACKET_MCN "4a656e734178626f65323030300000" + +#undef PACKET_USE_LS + +#define PKT_CTRL_CMD_SETUP 0 +#define PKT_CTRL_CMD_TEARDOWN 1 +#define PKT_CTRL_CMD_STATUS 2 + +struct pkt_ctrl_command { + __u32 command; /* in: Setup, teardown, status */ + __u32 dev_index; /* in/out: Device index */ + __u32 dev; /* in/out: Device nr for cdrw device */ + __u32 pkt_dev; /* in/out: Device nr for packet device */ + __u32 num_devices; /* out: Largest device index + 1 */ + __u32 padding; /* Not used */ +}; + +/* + * packet ioctls + */ +#define PACKET_IOCTL_MAGIC ('X') +#define PACKET_CTRL_CMD _IOWR(PACKET_IOCTL_MAGIC, 1, struct pkt_ctrl_command) + + +#endif /* _UAPI__PKTCDVD_H */ diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index eaa932b99d8a..ad4fb4eab753 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -117,7 +117,7 @@ struct xenbus_driver { const struct xenbus_device_id *id); void (*otherend_changed)(struct xenbus_device *dev, enum xenbus_state backend_state); - int (*remove)(struct xenbus_device *dev); + void (*remove)(struct xenbus_device *dev); int (*suspend)(struct xenbus_device *dev); int (*resume)(struct xenbus_device *dev); int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *); diff --git a/init/Kconfig b/init/Kconfig index 7e5c3ddc341d..0958846b005e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -894,13 +894,17 @@ config CC_IMPLICIT_FALLTHROUGH default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5) default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough) -# Currently, disable gcc-12 array-bounds globally. +# Currently, disable gcc-11,12 array-bounds globally. # We may want to target only particular configurations some day. +config GCC11_NO_ARRAY_BOUNDS + def_bool y + config GCC12_NO_ARRAY_BOUNDS def_bool y config CC_NO_ARRAY_BOUNDS bool + default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC_VERSION < 120000 && GCC11_NO_ARRAY_BOUNDS default y if CC_IS_GCC && GCC_VERSION >= 120000 && GCC_VERSION < 130000 && GCC12_NO_ARRAY_BOUNDS # diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 6f1d0e5df23a..992dcd9f8c4c 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1230,6 +1230,7 @@ static void io_wq_cancel_tw_create(struct io_wq *wq) worker = container_of(cb, struct io_worker, create_work); io_worker_cancel_cb(worker); + kfree(worker); } } diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 58ac13b69dc8..2ac1cd8d23ea 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -731,6 +731,8 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, size_t ocq_size = sizeof(struct io_overflow_cqe); bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32); + lockdep_assert_held(&ctx->completion_lock); + if (is_cqe32) ocq_size += sizeof(struct io_uring_cqe); @@ -820,9 +822,6 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, { struct io_uring_cqe *cqe; - if (!ctx->task_complete) - lockdep_assert_held(&ctx->completion_lock); - ctx->cq_extra++; /* @@ -1236,13 +1235,18 @@ static void io_req_local_work_add(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) + percpu_ref_get(&ctx->refs); + + if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) { + percpu_ref_put(&ctx->refs); return; + } /* need it for the following io_cqring_wake() */ smp_mb__after_atomic(); if (unlikely(atomic_read(&req->task->io_uring->in_idle))) { io_move_task_work_from_local(ctx); + percpu_ref_put(&ctx->refs); return; } @@ -1252,6 +1256,7 @@ static void io_req_local_work_add(struct io_kiocb *req) if (ctx->has_evfd) io_eventfd_signal(ctx); __io_cqring_wake(ctx); + percpu_ref_put(&ctx->refs); } void __io_req_task_work_add(struct io_kiocb *req, bool allow_local) @@ -2465,7 +2470,7 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx) /* when returns >0, the caller should retry */ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, - ktime_t timeout) + ktime_t *timeout) { int ret; unsigned long check_cq; @@ -2483,7 +2488,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, if (check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT)) return -EBADR; } - if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS)) + if (!schedule_hrtimeout(timeout, HRTIMER_MODE_ABS)) return -ETIME; /* @@ -2559,7 +2564,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, } prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, TASK_INTERRUPTIBLE); - ret = io_cqring_wait_schedule(ctx, &iowq, timeout); + ret = io_cqring_wait_schedule(ctx, &iowq, &timeout); if (__io_cqring_events_user(ctx) >= min_events) break; cond_resched(); diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index e9f0d41ebb99..ab4b2a1c3b7e 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -79,6 +79,19 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx); bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, bool cancel_all); +#define io_lockdep_assert_cq_locked(ctx) \ + do { \ + if (ctx->flags & IORING_SETUP_IOPOLL) { \ + lockdep_assert_held(&ctx->uring_lock); \ + } else if (!ctx->task_complete) { \ + lockdep_assert_held(&ctx->completion_lock); \ + } else if (ctx->submitter_task->flags & PF_EXITING) { \ + lockdep_assert(current_work()); \ + } else { \ + lockdep_assert(current == ctx->submitter_task); \ + } \ + } while (0) + static inline void io_req_task_work_add(struct io_kiocb *req) { __io_req_task_work_add(req, true); @@ -92,6 +105,8 @@ void io_cq_unlock_post(struct io_ring_ctx *ctx); static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx, bool overflow) { + io_lockdep_assert_cq_locked(ctx); + if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) { struct io_uring_cqe *cqe = ctx->cqe_cached; diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index dcec1b743c69..a60c561724be 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c @@ -159,7 +159,7 @@ static bool __report_matches(const struct expect_report *r) const bool is_assert = (r->access[0].type | r->access[1].type) & KCSAN_ACCESS_ASSERT; bool ret = false; unsigned long flags; - typeof(observed.lines) expect; + typeof(*observed.lines) *expect; const char *end; char *cur; int i; @@ -168,6 +168,10 @@ static bool __report_matches(const struct expect_report *r) if (!report_available()) return false; + expect = kmalloc(sizeof(observed.lines), GFP_KERNEL); + if (WARN_ON(!expect)) + return false; + /* Generate expected report contents. */ /* Title */ @@ -253,6 +257,7 @@ static bool __report_matches(const struct expect_report *r) strstr(observed.lines[2], expect[1]))); out: spin_unlock_irqrestore(&observed.lock, flags); + kfree(expect); return ret; } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 25b582b6ee5f..bb1ee6d7bdde 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2604,27 +2604,71 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) .user_mask = NULL, .flags = SCA_USER, /* clear the user requested mask */ }; + union cpumask_rcuhead { + cpumask_t cpumask; + struct rcu_head rcu; + }; __do_set_cpus_allowed(p, &ac); - kfree(ac.user_mask); + + /* + * Because this is called with p->pi_lock held, it is not possible + * to use kfree() here (when PREEMPT_RT=y), therefore punt to using + * kfree_rcu(). + */ + kfree_rcu((union cpumask_rcuhead *)ac.user_mask, rcu); +} + +static cpumask_t *alloc_user_cpus_ptr(int node) +{ + /* + * See do_set_cpus_allowed() above for the rcu_head usage. + */ + int size = max_t(int, cpumask_size(), sizeof(struct rcu_head)); + + return kmalloc_node(size, GFP_KERNEL, node); } int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node) { + cpumask_t *user_mask; unsigned long flags; - if (!src->user_cpus_ptr) + /* + * Always clear dst->user_cpus_ptr first as their user_cpus_ptr's + * may differ by now due to racing. + */ + dst->user_cpus_ptr = NULL; + + /* + * This check is racy and losing the race is a valid situation. + * It is not worth the extra overhead of taking the pi_lock on + * every fork/clone. + */ + if (data_race(!src->user_cpus_ptr)) return 0; - dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node); - if (!dst->user_cpus_ptr) + user_mask = alloc_user_cpus_ptr(node); + if (!user_mask) return -ENOMEM; - /* Use pi_lock to protect content of user_cpus_ptr */ + /* + * Use pi_lock to protect content of user_cpus_ptr + * + * Though unlikely, user_cpus_ptr can be reset to NULL by a concurrent + * do_set_cpus_allowed(). + */ raw_spin_lock_irqsave(&src->pi_lock, flags); - cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); + if (src->user_cpus_ptr) { + swap(dst->user_cpus_ptr, user_mask); + cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); + } raw_spin_unlock_irqrestore(&src->pi_lock, flags); + + if (unlikely(user_mask)) + kfree(user_mask); + return 0; } @@ -3581,6 +3625,11 @@ static inline bool rq_has_pinned_tasks(struct rq *rq) return false; } +static inline cpumask_t *alloc_user_cpus_ptr(int node) +{ + return NULL; +} + #endif /* !CONFIG_SMP */ static void @@ -5504,7 +5553,9 @@ void scheduler_tick(void) unsigned long thermal_pressure; u64 resched_latency; - arch_scale_freq_tick(); + if (housekeeping_cpu(cpu, HK_TYPE_TICK)) + arch_scale_freq_tick(); + sched_clock_tick(); rq_lock(rq, &rf); @@ -8239,8 +8290,8 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) if (retval) goto out_put_task; - user_mask = kmalloc(cpumask_size(), GFP_KERNEL); - if (!user_mask) { + user_mask = alloc_user_cpus_ptr(NUMA_NO_NODE); + if (IS_ENABLED(CONFIG_SMP) && !user_mask) { retval = -ENOMEM; goto out_put_task; } diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 475ecceda768..5e2c2c26b3cc 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -18,7 +18,7 @@ #include "tick-internal.h" /** - * tick_program_event + * tick_program_event - program the CPU local timer device for the next event */ int tick_program_event(ktime_t expires, int force) { @@ -99,7 +99,7 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) } /** - * tick_check_oneshot_mode - check whether the system is in oneshot mode + * tick_oneshot_mode_active - check whether the system is in oneshot mode * * returns 1 when either nohz or highres are enabled. otherwise 0. */ diff --git a/kernel/time/time.c b/kernel/time/time.c index 526257b3727c..f4198af60fee 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -462,7 +462,7 @@ struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec) EXPORT_SYMBOL(ns_to_kernel_old_timeval); /** - * set_normalized_timespec - set timespec sec and nsec parts and normalize + * set_normalized_timespec64 - set timespec sec and nsec parts and normalize * * @ts: pointer to timespec variable to be set * @sec: seconds to set @@ -526,7 +526,7 @@ struct timespec64 ns_to_timespec64(s64 nsec) EXPORT_SYMBOL(ns_to_timespec64); /** - * msecs_to_jiffies: - convert milliseconds to jiffies + * __msecs_to_jiffies: - convert milliseconds to jiffies * @m: time in milliseconds * * conversion is done as follows: @@ -541,12 +541,12 @@ EXPORT_SYMBOL(ns_to_timespec64); * handling any 32-bit overflows. * for the details see __msecs_to_jiffies() * - * msecs_to_jiffies() checks for the passed in value being a constant + * __msecs_to_jiffies() checks for the passed in value being a constant * via __builtin_constant_p() allowing gcc to eliminate most of the * code, __msecs_to_jiffies() is called if the value passed does not * allow constant folding and the actual conversion must be done at * runtime. - * the _msecs_to_jiffies helpers are the HZ dependent conversion + * The _msecs_to_jiffies helpers are the HZ dependent conversion * routines found in include/linux/jiffies.h */ unsigned long __msecs_to_jiffies(const unsigned int m) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f72b9f1de178..5579ead449f2 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1590,10 +1590,10 @@ void __weak read_persistent_clock64(struct timespec64 *ts) /** * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset * from the boot. + * @wall_time: current time as returned by persistent clock + * @boot_offset: offset that is defined as wall_time - boot_time * * Weak dummy function for arches that do not yet support it. - * @wall_time: - current time as returned by persistent clock - * @boot_offset: - offset that is defined as wall_time - boot_time * * The default function calculates offset based on the current value of * local_clock(). This way architectures that support sched_clock() but don't @@ -1701,7 +1701,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, } #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE) -/** +/* * We have three kinds of time sources to use for sleep time * injection, the preference order is: * 1) non-stop clocksource @@ -1722,7 +1722,7 @@ bool timekeeping_rtc_skipresume(void) return !suspend_timing_needed; } -/** +/* * 1) can be determined whether to use or not only when doing * timekeeping_resume() which is invoked after rtc_suspend(), * so we can't skip rtc_suspend() surely if system has 1). diff --git a/lib/scatterlist.c b/lib/scatterlist.c index a0ad2a7959b5..f72aa50c6654 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -476,7 +476,7 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append, /* Merge contiguous pages into the last SG */ prv_len = sgt_append->prv->length; last_pg = sg_page(sgt_append->prv); - while (n_pages && pages_are_mergeable(last_pg, pages[0])) { + while (n_pages && pages_are_mergeable(pages[0], last_pg)) { if (sgt_append->prv->length + PAGE_SIZE > max_segment) break; sgt_append->prv->length += PAGE_SIZE; diff --git a/mm/memblock.c b/mm/memblock.c index 511d4783dcf1..d036c7861310 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -836,7 +836,7 @@ void __init_memblock memblock_free(void *ptr, size_t size) * @base: phys starting address of the boot memory block * @size: size of the boot memory block in bytes * - * Free boot memory block previously allocated by memblock_alloc_xx() API. + * Free boot memory block previously allocated by memblock_phys_alloc_xx() API. * The freeing memory will not be released to the buddy allocator. */ int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 9630b1275557..82c7005ede65 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -305,13 +305,12 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) kfree(priv); } -static int xen_9pfs_front_remove(struct xenbus_device *dev) +static void xen_9pfs_front_remove(struct xenbus_device *dev) { struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev); dev_set_drvdata(&dev->dev, NULL); xen_9pfs_front_free(priv); - return 0; } static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, diff --git a/net/core/gro.c b/net/core/gro.c index fd8c6a7e8d3e..506f83d715f8 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -505,8 +505,9 @@ found_ptype: NAPI_GRO_CB(skb)->count = 1; if (unlikely(skb_is_gso(skb))) { NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs; - /* Only support TCP at the moment. */ - if (!skb_is_gso_tcp(skb)) + /* Only support TCP and non DODGY users. */ + if (!skb_is_gso_tcp(skb) || + (skb_shinfo(skb)->gso_type & SKB_GSO_DODGY)) NAPI_GRO_CB(skb)->flush = 1; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a06a9f847db5..ada087b50541 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -505,6 +505,7 @@ csum_copy_err: static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct raw6_sock *rp) { + struct ipv6_txoptions *opt; struct sk_buff *skb; int err = 0; int offset; @@ -522,6 +523,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, offset = rp->offset; total_len = inet_sk(sk)->cork.base.length; + opt = inet6_sk(sk)->cork.opt; + total_len -= opt ? opt->opt_flen : 0; + if (offset >= total_len - 1) { err = -EINVAL; ip6_flush_pending_frames(sk); diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index e76d3459d78e..ac5caf5a48e1 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -10,6 +10,7 @@ rxrpc-y := \ call_accept.o \ call_event.o \ call_object.o \ + call_state.o \ conn_client.o \ conn_event.o \ conn_object.o \ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 7ea576f6ba4b..ebbd4a1c3f86 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -155,10 +155,10 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) if (service_id) { write_lock(&local->services_lock); - if (rcu_access_pointer(local->service)) + if (local->service) goto service_in_use; rx->local = local; - rcu_assign_pointer(local->service, rx); + local->service = rx; write_unlock(&local->services_lock); rx->sk.sk_state = RXRPC_SERVER_BOUND; @@ -328,7 +328,6 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, mutex_unlock(&call->user_mutex); } - rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp); _leave(" = %p", call); return call; } @@ -374,13 +373,17 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call); * @sock: The socket the call is on * @call: The call to check * - * Allow a kernel service to find out whether a call is still alive - - * ie. whether it has completed. + * Allow a kernel service to find out whether a call is still alive - whether + * it has completed successfully and all received data has been consumed. */ bool rxrpc_kernel_check_life(const struct socket *sock, const struct rxrpc_call *call) { - return call->state != RXRPC_CALL_COMPLETE; + if (!rxrpc_call_is_complete(call)) + return true; + if (call->completion != RXRPC_CALL_SUCCEEDED) + return false; + return !skb_queue_empty(&call->recvmsg_queue); } EXPORT_SYMBOL(rxrpc_kernel_check_life); @@ -872,9 +875,9 @@ static int rxrpc_release_sock(struct sock *sk) sk->sk_state = RXRPC_CLOSE; - if (rx->local && rcu_access_pointer(rx->local->service) == rx) { + if (rx->local && rx->local->service == rx) { write_lock(&rx->local->services_lock); - rcu_assign_pointer(rx->local->service, NULL); + rx->local->service = NULL; write_unlock(&rx->local->services_lock); } @@ -957,16 +960,9 @@ static const struct net_proto_family rxrpc_family_ops = { static int __init af_rxrpc_init(void) { int ret = -1; - unsigned int tmp; BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof_field(struct sk_buff, cb)); - get_random_bytes(&tmp, sizeof(tmp)); - tmp &= 0x3fffffff; - if (tmp == 0) - tmp = 1; - idr_set_cursor(&rxrpc_client_conn_ids, tmp); - ret = -ENOMEM; rxrpc_call_jar = kmem_cache_create( "rxrpc_call_jar", sizeof(struct rxrpc_call), 0, @@ -1062,7 +1058,6 @@ static void __exit af_rxrpc_exit(void) * are released. */ rcu_barrier(); - rxrpc_destroy_client_conn_ids(); destroy_workqueue(rxrpc_workqueue); rxrpc_exit_security(); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 18092526d3c8..433060cade03 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -38,6 +38,7 @@ struct rxrpc_txbuf; enum rxrpc_skb_mark { RXRPC_SKB_MARK_PACKET, /* Received packet */ RXRPC_SKB_MARK_ERROR, /* Error notification */ + RXRPC_SKB_MARK_SERVICE_CONN_SECURED, /* Service connection response has been verified */ RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */ RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */ }; @@ -75,13 +76,7 @@ struct rxrpc_net { bool live; - bool kill_all_client_conns; atomic_t nr_client_conns; - spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ - struct mutex client_conn_discard_lock; /* Prevent multiple discarders */ - struct list_head idle_client_conns; - struct work_struct client_conn_reaper; - struct timer_list client_conn_reap_timer; struct hlist_head local_endpoints; struct mutex local_mutex; /* Lock for ->local_endpoints */ @@ -202,6 +197,7 @@ struct rxrpc_host_header { * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { + struct rxrpc_connection *conn; /* Connection referred to (poke packet) */ u16 offset; /* Offset of data */ u16 len; /* Length of data */ u8 flags; @@ -262,13 +258,11 @@ struct rxrpc_security { /* respond to a challenge */ int (*respond_to_challenge)(struct rxrpc_connection *, - struct sk_buff *, - u32 *); + struct sk_buff *); /* verify a response */ int (*verify_response)(struct rxrpc_connection *, - struct sk_buff *, - u32 *); + struct sk_buff *); /* clear connection security */ void (*clear)(struct rxrpc_connection *); @@ -283,22 +277,34 @@ struct rxrpc_local { struct rcu_head rcu; atomic_t active_users; /* Number of users of the local endpoint */ refcount_t ref; /* Number of references to the structure */ - struct rxrpc_net *rxnet; /* The network ns in which this resides */ + struct net *net; /* The network namespace */ + struct rxrpc_net *rxnet; /* Our bits in the network namespace */ struct hlist_node link; struct socket *socket; /* my UDP socket */ struct task_struct *io_thread; struct completion io_thread_ready; /* Indication that the I/O thread started */ - struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ + struct rxrpc_sock *service; /* Service(s) listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ struct sk_buff_head rx_queue; /* Received packets */ + struct list_head conn_attend_q; /* Conns requiring immediate attention */ struct list_head call_attend_q; /* Calls requiring immediate attention */ + struct rb_root client_bundles; /* Client connection bundles by socket params */ spinlock_t client_bundles_lock; /* Lock for client_bundles */ + bool kill_all_client_conns; + struct list_head idle_client_conns; + struct timer_list client_conn_reap_timer; + unsigned long client_conn_flags; +#define RXRPC_CLIENT_CONN_REAP_TIMER 0 /* The client conn reap timer expired */ + spinlock_t lock; /* access lock */ rwlock_t services_lock; /* lock for services list */ int debug_id; /* debug ID for printks */ bool dead; bool service_closed; /* Service socket closed */ + struct idr conn_ids; /* List of connection IDs */ + struct list_head new_client_calls; /* Newly created client calls need connection */ + spinlock_t client_call_lock; /* Lock for ->new_client_calls */ struct sockaddr_rxrpc srx; /* local address */ }; @@ -356,7 +362,6 @@ struct rxrpc_conn_proto { struct rxrpc_conn_parameters { struct rxrpc_local *local; /* Representation of local endpoint */ - struct rxrpc_peer *peer; /* Remote endpoint */ struct key *key; /* Security details */ bool exclusive; /* T if conn is exclusive */ bool upgrade; /* T if service ID can be upgraded */ @@ -365,10 +370,21 @@ struct rxrpc_conn_parameters { }; /* + * Call completion condition (state == RXRPC_CALL_COMPLETE). + */ +enum rxrpc_call_completion { + RXRPC_CALL_SUCCEEDED, /* - Normal termination */ + RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ + RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ + RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ + RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ + NR__RXRPC_CALL_COMPLETIONS +}; + +/* * Bits in the connection flags. */ enum rxrpc_conn_flag { - RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */ RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */ RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */ @@ -388,6 +404,7 @@ enum rxrpc_conn_flag { */ enum rxrpc_conn_event { RXRPC_CONN_EV_CHALLENGE, /* Send challenge packet */ + RXRPC_CONN_EV_ABORT_CALLS, /* Abort attached calls */ }; /* @@ -395,13 +412,13 @@ enum rxrpc_conn_event { */ enum rxrpc_conn_proto_state { RXRPC_CONN_UNUSED, /* Connection not yet attempted */ + RXRPC_CONN_CLIENT_UNSECURED, /* Client connection needs security init */ RXRPC_CONN_CLIENT, /* Client connection */ RXRPC_CONN_SERVICE_PREALLOC, /* Service connection preallocation */ RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */ RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */ RXRPC_CONN_SERVICE, /* Service secured connection */ - RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */ - RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */ + RXRPC_CONN_ABORTED, /* Conn aborted */ RXRPC_CONN__NR_STATES }; @@ -412,17 +429,16 @@ struct rxrpc_bundle { struct rxrpc_local *local; /* Representation of local endpoint */ struct rxrpc_peer *peer; /* Remote endpoint */ struct key *key; /* Security details */ + const struct rxrpc_security *security; /* applied security module */ refcount_t ref; atomic_t active; /* Number of active users */ unsigned int debug_id; u32 security_level; /* Security level selected */ u16 service_id; /* Service ID for this connection */ bool try_upgrade; /* True if the bundle is attempting upgrade */ - bool alloc_conn; /* True if someone's getting a conn */ bool exclusive; /* T if conn is exclusive */ bool upgrade; /* T if service ID can be upgraded */ - short alloc_error; /* Error from last conn allocation */ - spinlock_t channel_lock; + unsigned short alloc_error; /* Error from last conn allocation */ struct rb_node local_node; /* Node in local->client_conns */ struct list_head waiting_calls; /* Calls waiting for channels */ unsigned long avail_chans; /* Mask of available channels */ @@ -440,6 +456,7 @@ struct rxrpc_connection { struct rxrpc_peer *peer; /* Remote endpoint */ struct rxrpc_net *rxnet; /* Network namespace to which call belongs */ struct key *key; /* Security details */ + struct list_head attend_link; /* Link in local->conn_attend_q */ refcount_t ref; atomic_t active; /* Active count for service conns */ @@ -449,7 +466,7 @@ struct rxrpc_connection { unsigned char act_chans; /* Mask of active channels */ struct rxrpc_channel { unsigned long final_ack_at; /* Time at which to issue final ACK */ - struct rxrpc_call __rcu *call; /* Active call */ + struct rxrpc_call *call; /* Active call */ unsigned int call_debug_id; /* call->debug_id */ u32 call_id; /* ID of current call */ u32 call_counter; /* Call ID counter */ @@ -470,6 +487,7 @@ struct rxrpc_connection { struct list_head link; /* link in master connection list */ struct sk_buff_head rx_queue; /* received conn-level packets */ + struct mutex security_lock; /* Lock for security management */ const struct rxrpc_security *security; /* applied security module */ union { struct { @@ -483,7 +501,8 @@ struct rxrpc_connection { unsigned long idle_timestamp; /* Time at which last became idle */ spinlock_t state_lock; /* state-change lock */ enum rxrpc_conn_proto_state state; /* current state of connection */ - u32 abort_code; /* Abort code of connection abort */ + enum rxrpc_call_completion completion; /* Completion condition */ + s32 abort_code; /* Abort code of connection abort */ int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ @@ -527,7 +546,8 @@ enum rxrpc_call_flag { RXRPC_CALL_KERNEL, /* The call was made by the kernel */ RXRPC_CALL_UPGRADE, /* Service upgrade was requested for the call */ RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */ - RXRPC_CALL_RX_IS_IDLE, /* Reception is idle - send an ACK */ + RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */ + RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */ }; /* @@ -558,18 +578,6 @@ enum rxrpc_call_state { }; /* - * Call completion condition (state == RXRPC_CALL_COMPLETE). - */ -enum rxrpc_call_completion { - RXRPC_CALL_SUCCEEDED, /* - Normal termination */ - RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ - RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ - RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ - RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ - NR__RXRPC_CALL_COMPLETIONS -}; - -/* * Call Tx congestion management modes. */ enum rxrpc_congest_mode { @@ -587,6 +595,7 @@ enum rxrpc_congest_mode { struct rxrpc_call { struct rcu_head rcu; struct rxrpc_connection *conn; /* connection carrying call */ + struct rxrpc_bundle *bundle; /* Connection bundle to use */ struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_local *local; /* Representation of local endpoint */ struct rxrpc_sock __rcu *socket; /* socket responsible */ @@ -609,7 +618,7 @@ struct rxrpc_call { struct work_struct destroyer; /* In-process-context destroyer */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ - struct list_head chan_wait_link; /* Link in conn->bundle->waiting_calls */ + struct list_head wait_link; /* Link in local->new_client_calls */ struct hlist_node error_link; /* link in error distribution list */ struct list_head accept_link; /* Link in rx->acceptq */ struct list_head recvmsg_link; /* Link in rx->recvmsg_q */ @@ -623,10 +632,13 @@ struct rxrpc_call { unsigned long flags; unsigned long events; spinlock_t notify_lock; /* Kernel notification lock */ - rwlock_t state_lock; /* lock for state transition */ - u32 abort_code; /* Local/remote abort code */ + unsigned int send_abort_why; /* Why the abort [enum rxrpc_abort_reason] */ + s32 send_abort; /* Abort code to be sent */ + short send_abort_err; /* Error to be associated with the abort */ + rxrpc_seq_t send_abort_seq; /* DATA packet that incurred the abort (or 0) */ + s32 abort_code; /* Local/remote abort code */ int error; /* Local error incurred */ - enum rxrpc_call_state state; /* current state of call */ + enum rxrpc_call_state _state; /* Current state of call (needs barrier) */ enum rxrpc_call_completion completion; /* Call completion condition */ refcount_t ref; u8 security_ix; /* Security type */ @@ -812,9 +824,11 @@ extern struct workqueue_struct *rxrpc_workqueue; */ int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); void rxrpc_discard_prealloc(struct rxrpc_sock *); -int rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_peer *, - struct rxrpc_connection *, struct sockaddr_rxrpc *, - struct sk_buff *); +bool rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_peer *peer, + struct rxrpc_connection *conn, + struct sockaddr_rxrpc *peer_srx, + struct sk_buff *skb); void rxrpc_accept_incoming_calls(struct rxrpc_local *); int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long); @@ -834,7 +848,7 @@ void rxrpc_reduce_call_timer(struct rxrpc_call *call, unsigned long now, enum rxrpc_timer_trace why); -void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb); +bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb); /* * call_object.c @@ -851,6 +865,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct sockaddr_rxrpc *, struct rxrpc_call_params *, gfp_t, unsigned int); +void rxrpc_start_call_timer(struct rxrpc_call *call); void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, struct sk_buff *); void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); @@ -873,32 +888,88 @@ static inline bool rxrpc_is_client_call(const struct rxrpc_call *call) } /* + * call_state.c + */ +bool rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error); +bool rxrpc_call_completed(struct rxrpc_call *call); +bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, + u32 abort_code, int error, enum rxrpc_abort_reason why); +void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion compl, + int error); + +static inline void rxrpc_set_call_state(struct rxrpc_call *call, + enum rxrpc_call_state state) +{ + /* Order write of completion info before write of ->state. */ + smp_store_release(&call->_state, state); + wake_up(&call->waitq); +} + +static inline enum rxrpc_call_state __rxrpc_call_state(const struct rxrpc_call *call) +{ + return call->_state; /* Only inside I/O thread */ +} + +static inline bool __rxrpc_call_is_complete(const struct rxrpc_call *call) +{ + return __rxrpc_call_state(call) == RXRPC_CALL_COMPLETE; +} + +static inline enum rxrpc_call_state rxrpc_call_state(const struct rxrpc_call *call) +{ + /* Order read ->state before read of completion info. */ + return smp_load_acquire(&call->_state); +} + +static inline bool rxrpc_call_is_complete(const struct rxrpc_call *call) +{ + return rxrpc_call_state(call) == RXRPC_CALL_COMPLETE; +} + +static inline bool rxrpc_call_has_failed(const struct rxrpc_call *call) +{ + return rxrpc_call_is_complete(call) && call->completion != RXRPC_CALL_SUCCEEDED; +} + +/* * conn_client.c */ extern unsigned int rxrpc_reap_client_connections; extern unsigned long rxrpc_conn_idle_client_expiry; extern unsigned long rxrpc_conn_idle_client_fast_expiry; -extern struct idr rxrpc_client_conn_ids; -void rxrpc_destroy_client_conn_ids(void); +void rxrpc_purge_client_connections(struct rxrpc_local *local); struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); -int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *, - struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, - gfp_t); +int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp); +void rxrpc_connect_client_calls(struct rxrpc_local *local); void rxrpc_expose_client_call(struct rxrpc_call *); void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *); +void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace); -void rxrpc_discard_expired_client_conns(struct work_struct *); -void rxrpc_destroy_all_client_connections(struct rxrpc_net *); +void rxrpc_discard_expired_client_conns(struct rxrpc_local *local); void rxrpc_clean_up_local_conns(struct rxrpc_local *); /* * conn_event.c */ +void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct sk_buff *skb, + unsigned int channel); +int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb, + s32 abort_code, int err, enum rxrpc_abort_reason why); void rxrpc_process_connection(struct work_struct *); void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool); -int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb); +bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb); +void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb); + +static inline bool rxrpc_is_conn_aborted(const struct rxrpc_connection *conn) +{ + /* Order reading the abort info after the state check. */ + return smp_load_acquire(&conn->state) == RXRPC_CONN_ABORTED; +} /* * conn_object.c @@ -906,6 +977,7 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb); extern unsigned int rxrpc_connection_expiry; extern unsigned int rxrpc_closed_conn_expiry; +void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why); struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *, gfp_t); struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *, struct sockaddr_rxrpc *, @@ -961,12 +1033,19 @@ void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *); */ int rxrpc_encap_rcv(struct sock *, struct sk_buff *); void rxrpc_error_report(struct sock *); +bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why, + s32 abort_code, int err); int rxrpc_io_thread(void *data); static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local) { wake_up_process(local->io_thread); } +static inline bool rxrpc_protocol_error(struct sk_buff *skb, enum rxrpc_abort_reason why) +{ + return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EPROTO); +} + /* * insecure.c */ @@ -1048,6 +1127,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net) int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb); int rxrpc_send_abort_packet(struct rxrpc_call *); int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *); +void rxrpc_send_conn_abort(struct rxrpc_connection *conn); void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb); void rxrpc_send_keepalive(struct rxrpc_peer *); void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb); @@ -1063,12 +1143,11 @@ void rxrpc_peer_keepalive_worker(struct work_struct *); */ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, const struct sockaddr_rxrpc *); -struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *, - struct sockaddr_rxrpc *, gfp_t); +struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, + struct sockaddr_rxrpc *srx, gfp_t gfp); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t, enum rxrpc_peer_trace); -void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *, - struct rxrpc_peer *); +void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer); void rxrpc_destroy_all_peers(struct rxrpc_net *); struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *, enum rxrpc_peer_trace); struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *, enum rxrpc_peer_trace); @@ -1086,33 +1165,22 @@ extern const struct seq_operations rxrpc_local_seq_ops; * recvmsg.c */ void rxrpc_notify_socket(struct rxrpc_call *); -bool __rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int); -bool rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int); -bool __rxrpc_call_completed(struct rxrpc_call *); -bool rxrpc_call_completed(struct rxrpc_call *); -bool __rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int); -bool rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* * Abort a call due to a protocol error. */ -static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, - struct sk_buff *skb, - const char *eproto_why, - const char *why, - u32 abort_code) +static inline int rxrpc_abort_eproto(struct rxrpc_call *call, + struct sk_buff *skb, + s32 abort_code, + enum rxrpc_abort_reason why) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why); - return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO); + rxrpc_abort_call(call, sp->hdr.seq, abort_code, -EPROTO, why); + return -EPROTO; } -#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \ - __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \ - (abort_why), (abort_code)) - /* * rtt.c */ @@ -1144,6 +1212,8 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *, /* * sendmsg.c */ +bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error, + enum rxrpc_abort_reason why); int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index c02401656fa9..3e8689fdc437 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -99,7 +99,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, if (!call) return -ENOMEM; call->flags |= (1 << RXRPC_CALL_IS_SERVICE); - call->state = RXRPC_CALL_SERVER_PREALLOC; + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_PREALLOC); __set_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events); trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), @@ -280,7 +280,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, (peer_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); - rxrpc_new_incoming_peer(rx, local, peer); + rxrpc_new_incoming_peer(local, peer); } /* Now allocate and set up the connection */ @@ -326,11 +326,11 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, * If we want to report an error, we mark the skb with the packet type and * abort code and return false. */ -int rxrpc_new_incoming_call(struct rxrpc_local *local, - struct rxrpc_peer *peer, - struct rxrpc_connection *conn, - struct sockaddr_rxrpc *peer_srx, - struct sk_buff *skb) +bool rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_peer *peer, + struct rxrpc_connection *conn, + struct sockaddr_rxrpc *peer_srx, + struct sk_buff *skb) { const struct rxrpc_security *sec = NULL; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -339,18 +339,17 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local, _enter(""); - /* Don't set up a call for anything other than the first DATA packet. */ - if (sp->hdr.seq != 1 || - sp->hdr.type != RXRPC_PACKET_TYPE_DATA) - return 0; /* Just discard */ + /* Don't set up a call for anything other than a DATA packet. */ + if (sp->hdr.type != RXRPC_PACKET_TYPE_DATA) + return rxrpc_protocol_error(skb, rxrpc_eproto_no_service_call); - rcu_read_lock(); + read_lock(&local->services_lock); /* Weed out packets to services we're not offering. Packets that would * begin a call are explicitly rejected and the rest are just * discarded. */ - rx = rcu_dereference(local->service); + rx = local->service; if (!rx || (sp->hdr.serviceId != rx->srx.srx_service && sp->hdr.serviceId != rx->second_service) ) { @@ -363,16 +362,14 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local, if (!conn) { sec = rxrpc_get_incoming_security(rx, skb); if (!sec) - goto reject; + goto unsupported_security; } spin_lock(&rx->incoming_lock); if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED || rx->sk.sk_state == RXRPC_CLOSE) { - trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber, - sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN); - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; - skb->priority = RX_INVALID_OPERATION; + rxrpc_direct_abort(skb, rxrpc_abort_shut_down, + RX_INVALID_OPERATION, -ESHUTDOWN); goto no_call; } @@ -402,7 +399,7 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local, spin_unlock(&conn->state_lock); spin_unlock(&rx->incoming_lock); - rcu_read_unlock(); + read_unlock(&local->services_lock); if (hlist_unhashed(&call->error_link)) { spin_lock(&call->peer->lock); @@ -413,22 +410,24 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local, _leave(" = %p{%d}", call, call->debug_id); rxrpc_input_call_event(call, skb); rxrpc_put_call(call, rxrpc_call_put_input); - return 0; + return true; unsupported_service: - trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_INVALID_OPERATION, EOPNOTSUPP); - skb->priority = RX_INVALID_OPERATION; - goto reject; + read_unlock(&local->services_lock); + return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, + RX_INVALID_OPERATION, -EOPNOTSUPP); +unsupported_security: + read_unlock(&local->services_lock); + return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, + RX_INVALID_OPERATION, -EKEYREJECTED); no_call: spin_unlock(&rx->incoming_lock); -reject: - rcu_read_unlock(); + read_unlock(&local->services_lock); _leave(" = f [%u]", skb->mark); - return -EPROTO; + return false; discard: - rcu_read_unlock(); - return 0; + read_unlock(&local->services_lock); + return true; } /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index b2cf448fb02c..1abdef15debc 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -251,6 +251,41 @@ out: _leave(""); } +/* + * Start transmitting the reply to a service. This cancels the need to ACK the + * request if we haven't yet done so. + */ +static void rxrpc_begin_service_reply(struct rxrpc_call *call) +{ + unsigned long now = jiffies; + + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SEND_REPLY); + WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET); + if (call->ackr_reason == RXRPC_ACK_DELAY) + call->ackr_reason = 0; + trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); +} + +/* + * Close the transmission phase. After this point there is no more data to be + * transmitted in the call. + */ +static void rxrpc_close_tx_phase(struct rxrpc_call *call) +{ + _debug("________awaiting reply/ACK__________"); + + switch (__rxrpc_call_state(call)) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_REPLY); + break; + case RXRPC_CALL_SERVER_SEND_REPLY: + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_AWAIT_ACK); + break; + default: + break; + } +} + static bool rxrpc_tx_window_has_space(struct rxrpc_call *call) { unsigned int winsize = min_t(unsigned int, call->tx_winsize, @@ -270,9 +305,11 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) { struct rxrpc_txbuf *txb; - if (rxrpc_is_client_call(call) && - !test_bit(RXRPC_CALL_EXPOSED, &call->flags)) + if (!test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + if (list_empty(&call->tx_sendmsg)) + return; rxrpc_expose_client_call(call); + } while ((txb = list_first_entry_or_null(&call->tx_sendmsg, struct rxrpc_txbuf, call_link))) { @@ -283,6 +320,9 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) call->tx_top = txb->seq; list_add_tail(&txb->call_link, &call->tx_buffer); + if (txb->wire.flags & RXRPC_LAST_PACKET) + rxrpc_close_tx_phase(call); + rxrpc_transmit_one(call, txb); if (!rxrpc_tx_window_has_space(call)) @@ -292,16 +332,15 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) static void rxrpc_transmit_some_data(struct rxrpc_call *call) { - switch (call->state) { + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_SERVER_ACK_REQUEST: if (list_empty(&call->tx_sendmsg)) return; + rxrpc_begin_service_reply(call); fallthrough; case RXRPC_CALL_SERVER_SEND_REPLY: - case RXRPC_CALL_SERVER_AWAIT_ACK: case RXRPC_CALL_CLIENT_SEND_REQUEST: - case RXRPC_CALL_CLIENT_AWAIT_REPLY: if (!rxrpc_tx_window_has_space(call)) return; if (list_empty(&call->tx_sendmsg)) { @@ -331,21 +370,31 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call) /* * Handle retransmission and deferred ACK/abort generation. */ -void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) +bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) { unsigned long now, next, t; rxrpc_serial_t ackr_serial; bool resend = false, expired = false; + s32 abort_code; rxrpc_see_call(call, rxrpc_call_see_input); //printk("\n--------------------\n"); _enter("{%d,%s,%lx}", - call->debug_id, rxrpc_call_states[call->state], call->events); + call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)], + call->events); - if (call->state == RXRPC_CALL_COMPLETE) + if (__rxrpc_call_is_complete(call)) goto out; + /* Handle abort request locklessly, vs rxrpc_propose_abort(). */ + abort_code = smp_load_acquire(&call->send_abort); + if (abort_code) { + rxrpc_abort_call(call, 0, call->send_abort, call->send_abort_err, + call->send_abort_why); + goto out; + } + if (skb && skb->mark == RXRPC_SKB_MARK_ERROR) goto out; @@ -358,7 +407,7 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) } t = READ_ONCE(call->expect_req_by); - if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST && + if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST && time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now); expired = true; @@ -429,11 +478,12 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) && (int)call->conn->hi_serial - (int)call->rx_serial > 0) { trace_rxrpc_call_reset(call); - rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET); + rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET, + rxrpc_abort_call_reset); } else { - rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); + rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME, + rxrpc_abort_call_timeout); } - rxrpc_send_abort_packet(call); goto out; } @@ -441,7 +491,7 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_lost_ack); - if (resend && call->state != RXRPC_CALL_CLIENT_RECV_REPLY) + if (resend && __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY) rxrpc_resend(call, NULL); if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags)) @@ -453,7 +503,7 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ack_input_data); /* Make sure the timer is restarted */ - if (call->state != RXRPC_CALL_COMPLETE) { + if (!__rxrpc_call_is_complete(call)) { next = call->expect_rx_by; #define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; } @@ -474,9 +524,15 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) } out: - if (call->state == RXRPC_CALL_COMPLETE) + if (__rxrpc_call_is_complete(call)) { del_timer_sync(&call->timer); + if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) + rxrpc_disconnect_call(call); + if (call->security) + call->security->free_call_crypto(call); + } if (call->acks_hard_ack != call->tx_bottom) rxrpc_shrink_call_tx_buffer(call); _leave(""); + return true; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 89dcf60b1158..3ded5a24627c 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -50,7 +50,7 @@ void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what) struct rxrpc_local *local = call->local; bool busy; - if (call->state < RXRPC_CALL_COMPLETE) { + if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) { spin_lock_bh(&local->lock); busy = !list_empty(&call->attend_link); trace_rxrpc_poke_call(call, busy, what); @@ -69,7 +69,7 @@ static void rxrpc_call_timer_expired(struct timer_list *t) _enter("%d", call->debug_id); - if (call->state < RXRPC_CALL_COMPLETE) { + if (!__rxrpc_call_is_complete(call)) { trace_rxrpc_timer_expired(call, jiffies); rxrpc_poke_call(call, rxrpc_call_poke_timer); } @@ -150,7 +150,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, timer_setup(&call->timer, rxrpc_call_timer_expired, 0); INIT_WORK(&call->destroyer, rxrpc_destroy_call); INIT_LIST_HEAD(&call->link); - INIT_LIST_HEAD(&call->chan_wait_link); + INIT_LIST_HEAD(&call->wait_link); INIT_LIST_HEAD(&call->accept_link); INIT_LIST_HEAD(&call->recvmsg_link); INIT_LIST_HEAD(&call->sock_link); @@ -162,7 +162,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, init_waitqueue_head(&call->waitq); spin_lock_init(&call->notify_lock); spin_lock_init(&call->tx_lock); - rwlock_init(&call->state_lock); refcount_set(&call->ref, 1); call->debug_id = debug_id; call->tx_total_len = -1; @@ -211,7 +210,6 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, now = ktime_get_real(); call->acks_latest_ts = now; call->cong_tstamp = now; - call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; call->dest_srx = *srx; call->interruptibility = p->interruptibility; call->tx_total_len = p->tx_total_len; @@ -227,11 +225,13 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, ret = rxrpc_init_client_call_security(call); if (ret < 0) { - __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); + rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, ret); rxrpc_put_call(call, rxrpc_call_put_discard_error); return ERR_PTR(ret); } + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_CONN); + trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), p->user_call_ID, rxrpc_call_new_client); @@ -242,7 +242,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, /* * Initiate the call ack/resend/expiry timer. */ -static void rxrpc_start_call_timer(struct rxrpc_call *call) +void rxrpc_start_call_timer(struct rxrpc_call *call) { unsigned long now = jiffies; unsigned long j = now + MAX_JIFFY_OFFSET; @@ -287,6 +287,39 @@ static void rxrpc_put_call_slot(struct rxrpc_call *call) } /* + * Start the process of connecting a call. We obtain a peer and a connection + * bundle, but the actual association of a call with a connection is offloaded + * to the I/O thread to simplify locking. + */ +static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp) +{ + struct rxrpc_local *local = call->local; + int ret = 0; + + _enter("{%d,%lx},", call->debug_id, call->user_call_ID); + + call->peer = rxrpc_lookup_peer(local, &call->dest_srx, gfp); + if (!call->peer) + goto error; + + ret = rxrpc_look_up_bundle(call, gfp); + if (ret < 0) + goto error; + + trace_rxrpc_client(NULL, -1, rxrpc_client_queue_new_call); + rxrpc_get_call(call, rxrpc_call_get_io_thread); + spin_lock(&local->client_call_lock); + list_add_tail(&call->wait_link, &local->new_client_calls); + spin_unlock(&local->client_call_lock); + rxrpc_wake_up_io_thread(local); + return 0; + +error: + __set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + return ret; +} + +/* * Set up a call for the given parameters. * - Called with the socket lock held, which it must release. * - If it returns a call, the call's lock will need releasing by the caller. @@ -365,14 +398,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, /* Set up or get a connection record and set the protocol parameters, * including channel number and call ID. */ - ret = rxrpc_connect_call(rx, call, cp, srx, gfp); + ret = rxrpc_connect_call(call, gfp); if (ret < 0) goto error_attached_to_socket; - rxrpc_see_call(call, rxrpc_call_see_connected); - - rxrpc_start_call_timer(call); - _leave(" = %p [new]", call); return call; @@ -384,27 +413,23 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, error_dup_user_ID: write_unlock(&rx->call_lock); release_sock(&rx->sk); - __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, -EEXIST); + rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EEXIST); trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0, rxrpc_call_see_userid_exists); - rxrpc_release_call(rx, call); mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put_userid_exists); _leave(" = -EEXIST"); return ERR_PTR(-EEXIST); /* We got an error, but the call is attached to the socket and is in - * need of release. However, we might now race with recvmsg() when - * completing the call queues it. Return 0 from sys_sendmsg() and + * need of release. However, we might now race with recvmsg() when it + * completion notifies the socket. Return 0 from sys_sendmsg() and * leave the error to recvmsg() to deal with. */ error_attached_to_socket: trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), ret, rxrpc_call_see_connect_failed); - set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); - __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, ret); + rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); _leave(" = c=%08x [err]", call->debug_id); return call; } @@ -427,32 +452,32 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->call_id = sp->hdr.callNumber; call->dest_srx.srx_service = sp->hdr.serviceId; call->cid = sp->hdr.cid; - call->state = RXRPC_CALL_SERVER_SECURING; call->cong_tstamp = skb->tstamp; + __set_bit(RXRPC_CALL_EXPOSED, &call->flags); + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + spin_lock(&conn->state_lock); switch (conn->state) { case RXRPC_CONN_SERVICE_UNSECURED: case RXRPC_CONN_SERVICE_CHALLENGING: - call->state = RXRPC_CALL_SERVER_SECURING; + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); break; case RXRPC_CONN_SERVICE: - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); break; - case RXRPC_CONN_REMOTELY_ABORTED: - __rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - conn->abort_code, conn->error); - break; - case RXRPC_CONN_LOCALLY_ABORTED: - __rxrpc_abort_call("CON", call, 1, - conn->abort_code, conn->error); + case RXRPC_CONN_ABORTED: + rxrpc_set_call_completion(call, conn->completion, + conn->abort_code, conn->error); break; default: BUG(); } + rxrpc_get_call(call, rxrpc_call_get_io_thread); + /* Set the channel for this call. We don't get channel_lock as we're * only defending against the data_ready handler (which we're called * from) and the RESPONSE packet parser (which is only really @@ -462,7 +487,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, chan = sp->hdr.cid & RXRPC_CHANNELMASK; conn->channels[chan].call_counter = call->call_id; conn->channels[chan].call_id = call->call_id; - rcu_assign_pointer(conn->channels[chan].call, call); + conn->channels[chan].call = call; spin_unlock(&conn->state_lock); spin_lock(&conn->peer->lock); @@ -522,20 +547,17 @@ static void rxrpc_cleanup_ring(struct rxrpc_call *call) void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; - bool put = false; + bool put = false, putu = false; _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref)); trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), call->flags, rxrpc_call_see_release); - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); rxrpc_put_call_slot(call); - del_timer_sync(&call->timer); /* Make sure we don't get any more notifications */ write_lock(&rx->recvmsg_lock); @@ -560,7 +582,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { rb_erase(&call->sock_node, &rx->calls); memset(&call->sock_node, 0xdd, sizeof(call->sock_node)); - rxrpc_put_call(call, rxrpc_call_put_userid_exists); + putu = true; } list_del(&call->sock_link); @@ -568,10 +590,9 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) - rxrpc_disconnect_call(call); - if (call->security) - call->security->free_call_crypto(call); + if (putu) + rxrpc_put_call(call, rxrpc_call_put_userid); + _leave(""); } @@ -588,7 +609,8 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) call = list_entry(rx->to_be_accepted.next, struct rxrpc_call, accept_link); list_del(&call->accept_link); - rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET); + rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, + rxrpc_abort_call_sock_release_tba); rxrpc_put_call(call, rxrpc_call_put_release_sock_tba); } @@ -596,8 +618,8 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) call = list_entry(rx->sock_calls.next, struct rxrpc_call, sock_link); rxrpc_get_call(call, rxrpc_call_get_release_sock); - rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET); - rxrpc_send_abort_packet(call); + rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, + rxrpc_abort_call_sock_release); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put_release_sock); } @@ -620,7 +642,7 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace why) dead = __refcount_dec_and_test(&call->ref, &r); trace_rxrpc_call(debug_id, r - 1, 0, why); if (dead) { - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); + ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE); if (!list_empty(&call->link)) { spin_lock(&rxnet->call_lock); @@ -669,6 +691,8 @@ static void rxrpc_destroy_call(struct work_struct *work) rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned); rxrpc_put_connection(call->conn, rxrpc_conn_put_call); + rxrpc_deactivate_bundle(call->bundle); + rxrpc_put_bundle(call->bundle, rxrpc_bundle_put_call); rxrpc_put_peer(call->peer, rxrpc_peer_put_call); rxrpc_put_local(call->local, rxrpc_local_put_call); call_rcu(&call->rcu, rxrpc_rcu_free_call); @@ -681,7 +705,7 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) { memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); + ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); del_timer(&call->timer); @@ -719,7 +743,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", call, refcount_read(&call->ref), - rxrpc_call_states[call->state], + rxrpc_call_states[__rxrpc_call_state(call)], call->flags, call->events); spin_unlock(&rxnet->call_lock); diff --git a/net/rxrpc/call_state.c b/net/rxrpc/call_state.c new file mode 100644 index 000000000000..6afb54373ebb --- /dev/null +++ b/net/rxrpc/call_state.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Call state changing functions. + * + * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include "ar-internal.h" + +/* + * Transition a call to the complete state. + */ +bool rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + if (__rxrpc_call_state(call) == RXRPC_CALL_COMPLETE) + return false; + + call->abort_code = abort_code; + call->error = error; + call->completion = compl; + /* Allow reader of completion state to operate locklessly */ + rxrpc_set_call_state(call, RXRPC_CALL_COMPLETE); + trace_rxrpc_call_complete(call); + wake_up(&call->waitq); + rxrpc_notify_socket(call); + return true; +} + +/* + * Record that a call successfully completed. + */ +bool rxrpc_call_completed(struct rxrpc_call *call) +{ + return rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); +} + +/* + * Record that a call is locally aborted. + */ +bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, + u32 abort_code, int error, enum rxrpc_abort_reason why) +{ + trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, + abort_code, error); + if (!rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, + abort_code, error)) + return false; + if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) + rxrpc_send_abort_packet(call); + return true; +} + +/* + * Record that a call errored out before even getting off the ground, thereby + * setting the state to allow it to be destroyed. + */ +void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion compl, + int error) +{ + call->abort_code = RX_CALL_DEAD; + call->error = error; + call->completion = compl; + call->_state = RXRPC_CALL_COMPLETE; + trace_rxrpc_call_complete(call); + WARN_ON_ONCE(__test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags)); +} diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 87efa0373aed..981ca5b98bcb 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -34,104 +34,59 @@ __read_mostly unsigned int rxrpc_reap_client_connections = 900; __read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; -/* - * We use machine-unique IDs for our client connections. - */ -DEFINE_IDR(rxrpc_client_conn_ids); -static DEFINE_SPINLOCK(rxrpc_conn_id_lock); - -static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); - -/* - * Get a connection ID and epoch for a client connection from the global pool. - * The connection struct pointer is then recorded in the idr radix tree. The - * epoch doesn't change until the client is rebooted (or, at least, unless the - * module is unloaded). - */ -static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, - gfp_t gfp) +static void rxrpc_activate_bundle(struct rxrpc_bundle *bundle) { - struct rxrpc_net *rxnet = conn->rxnet; - int id; - - _enter(""); - - idr_preload(gfp); - spin_lock(&rxrpc_conn_id_lock); - - id = idr_alloc_cyclic(&rxrpc_client_conn_ids, conn, - 1, 0x40000000, GFP_NOWAIT); - if (id < 0) - goto error; - - spin_unlock(&rxrpc_conn_id_lock); - idr_preload_end(); - - conn->proto.epoch = rxnet->epoch; - conn->proto.cid = id << RXRPC_CIDSHIFT; - set_bit(RXRPC_CONN_HAS_IDR, &conn->flags); - _leave(" [CID %x]", conn->proto.cid); - return 0; - -error: - spin_unlock(&rxrpc_conn_id_lock); - idr_preload_end(); - _leave(" = %d", id); - return id; + atomic_inc(&bundle->active); } /* - * Release a connection ID for a client connection from the global pool. + * Release a connection ID for a client connection. */ -static void rxrpc_put_client_connection_id(struct rxrpc_connection *conn) +static void rxrpc_put_client_connection_id(struct rxrpc_local *local, + struct rxrpc_connection *conn) { - if (test_bit(RXRPC_CONN_HAS_IDR, &conn->flags)) { - spin_lock(&rxrpc_conn_id_lock); - idr_remove(&rxrpc_client_conn_ids, - conn->proto.cid >> RXRPC_CIDSHIFT); - spin_unlock(&rxrpc_conn_id_lock); - } + idr_remove(&local->conn_ids, conn->proto.cid >> RXRPC_CIDSHIFT); } /* * Destroy the client connection ID tree. */ -void rxrpc_destroy_client_conn_ids(void) +static void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local) { struct rxrpc_connection *conn; int id; - if (!idr_is_empty(&rxrpc_client_conn_ids)) { - idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) { + if (!idr_is_empty(&local->conn_ids)) { + idr_for_each_entry(&local->conn_ids, conn, id) { pr_err("AF_RXRPC: Leaked client conn %p {%d}\n", conn, refcount_read(&conn->ref)); } BUG(); } - idr_destroy(&rxrpc_client_conn_ids); + idr_destroy(&local->conn_ids); } /* * Allocate a connection bundle. */ -static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, +static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call, gfp_t gfp) { struct rxrpc_bundle *bundle; bundle = kzalloc(sizeof(*bundle), gfp); if (bundle) { - bundle->local = cp->local; - bundle->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_bundle); - bundle->key = cp->key; - bundle->exclusive = cp->exclusive; - bundle->upgrade = cp->upgrade; - bundle->service_id = cp->service_id; - bundle->security_level = cp->security_level; + bundle->local = call->local; + bundle->peer = rxrpc_get_peer(call->peer, rxrpc_peer_get_bundle); + bundle->key = key_get(call->key); + bundle->security = call->security; + bundle->exclusive = test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags); + bundle->upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags); + bundle->service_id = call->dest_srx.srx_service; + bundle->security_level = call->security_level; refcount_set(&bundle->ref, 1); atomic_set(&bundle->active, 1); - spin_lock_init(&bundle->channel_lock); INIT_LIST_HEAD(&bundle->waiting_calls); trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new); } @@ -152,84 +107,87 @@ static void rxrpc_free_bundle(struct rxrpc_bundle *bundle) { trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free); rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle); + key_put(bundle->key); kfree(bundle); } void rxrpc_put_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why) { - unsigned int id = bundle->debug_id; + unsigned int id; bool dead; int r; - dead = __refcount_dec_and_test(&bundle->ref, &r); - trace_rxrpc_bundle(id, r - 1, why); - if (dead) - rxrpc_free_bundle(bundle); + if (bundle) { + id = bundle->debug_id; + dead = __refcount_dec_and_test(&bundle->ref, &r); + trace_rxrpc_bundle(id, r - 1, why); + if (dead) + rxrpc_free_bundle(bundle); + } +} + +/* + * Get rid of outstanding client connection preallocations when a local + * endpoint is destroyed. + */ +void rxrpc_purge_client_connections(struct rxrpc_local *local) +{ + rxrpc_destroy_client_conn_ids(local); } /* * Allocate a client connection. */ static struct rxrpc_connection * -rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) +rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle) { struct rxrpc_connection *conn; - struct rxrpc_net *rxnet = bundle->local->rxnet; - int ret; + struct rxrpc_local *local = bundle->local; + struct rxrpc_net *rxnet = local->rxnet; + int id; _enter(""); - conn = rxrpc_alloc_connection(rxnet, gfp); - if (!conn) { - _leave(" = -ENOMEM"); + conn = rxrpc_alloc_connection(rxnet, GFP_ATOMIC | __GFP_NOWARN); + if (!conn) return ERR_PTR(-ENOMEM); + + id = idr_alloc_cyclic(&local->conn_ids, conn, 1, 0x40000000, + GFP_ATOMIC | __GFP_NOWARN); + if (id < 0) { + kfree(conn); + return ERR_PTR(id); } refcount_set(&conn->ref, 1); - conn->bundle = bundle; - conn->local = bundle->local; - conn->peer = bundle->peer; - conn->key = bundle->key; + conn->proto.cid = id << RXRPC_CIDSHIFT; + conn->proto.epoch = local->rxnet->epoch; + conn->out_clientflag = RXRPC_CLIENT_INITIATED; + conn->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn); + conn->local = rxrpc_get_local(bundle->local, rxrpc_local_get_client_conn); + conn->peer = rxrpc_get_peer(bundle->peer, rxrpc_peer_get_client_conn); + conn->key = key_get(bundle->key); + conn->security = bundle->security; conn->exclusive = bundle->exclusive; conn->upgrade = bundle->upgrade; conn->orig_service_id = bundle->service_id; conn->security_level = bundle->security_level; - conn->out_clientflag = RXRPC_CLIENT_INITIATED; - conn->state = RXRPC_CONN_CLIENT; + conn->state = RXRPC_CONN_CLIENT_UNSECURED; conn->service_id = conn->orig_service_id; - ret = rxrpc_get_client_connection_id(conn, gfp); - if (ret < 0) - goto error_0; - - ret = rxrpc_init_client_conn_security(conn); - if (ret < 0) - goto error_1; + if (conn->security == &rxrpc_no_security) + conn->state = RXRPC_CONN_CLIENT; atomic_inc(&rxnet->nr_conns); write_lock(&rxnet->conn_lock); list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); write_unlock(&rxnet->conn_lock); - rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn); - rxrpc_get_peer(conn->peer, rxrpc_peer_get_client_conn); - rxrpc_get_local(conn->local, rxrpc_local_get_client_conn); - key_get(conn->key); - - trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref), - rxrpc_conn_new_client); + rxrpc_see_connection(conn, rxrpc_conn_new_client); atomic_inc(&rxnet->nr_client_conns); trace_rxrpc_client(conn, -1, rxrpc_client_alloc); - _leave(" = %p", conn); return conn; - -error_1: - rxrpc_put_client_connection_id(conn); -error_0: - kfree(conn); - _leave(" = %d", ret); - return ERR_PTR(ret); } /* @@ -247,7 +205,8 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags)) goto dont_reuse; - if (conn->state != RXRPC_CONN_CLIENT || + if ((conn->state != RXRPC_CONN_CLIENT_UNSECURED && + conn->state != RXRPC_CONN_CLIENT) || conn->proto.epoch != rxnet->epoch) goto mark_dont_reuse; @@ -257,7 +216,7 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) * times the maximum number of client conns away from the current * allocation point to try and keep the IDs concentrated. */ - id_cursor = idr_get_cursor(&rxrpc_client_conn_ids); + id_cursor = idr_get_cursor(&conn->local->conn_ids); id = conn->proto.cid >> RXRPC_CIDSHIFT; distance = id - id_cursor; if (distance < 0) @@ -278,20 +237,23 @@ dont_reuse: * Look up the conn bundle that matches the connection parameters, adding it if * it doesn't yet exist. */ -static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *cp, - gfp_t gfp) +int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp) { static atomic_t rxrpc_bundle_id; struct rxrpc_bundle *bundle, *candidate; - struct rxrpc_local *local = cp->local; + struct rxrpc_local *local = call->local; struct rb_node *p, **pp, *parent; long diff; + bool upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags); _enter("{%px,%x,%u,%u}", - cp->peer, key_serial(cp->key), cp->security_level, cp->upgrade); + call->peer, key_serial(call->key), call->security_level, + upgrade); - if (cp->exclusive) - return rxrpc_alloc_bundle(cp, gfp); + if (test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags)) { + call->bundle = rxrpc_alloc_bundle(call, gfp); + return call->bundle ? 0 : -ENOMEM; + } /* First, see if the bundle is already there. */ _debug("search 1"); @@ -300,11 +262,11 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c while (p) { bundle = rb_entry(p, struct rxrpc_bundle, local_node); -#define cmp(X) ((long)bundle->X - (long)cp->X) - diff = (cmp(peer) ?: - cmp(key) ?: - cmp(security_level) ?: - cmp(upgrade)); +#define cmp(X, Y) ((long)(X) - (long)(Y)) + diff = (cmp(bundle->peer, call->peer) ?: + cmp(bundle->key, call->key) ?: + cmp(bundle->security_level, call->security_level) ?: + cmp(bundle->upgrade, upgrade)); #undef cmp if (diff < 0) p = p->rb_left; @@ -317,9 +279,9 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c _debug("not found"); /* It wasn't. We need to add one. */ - candidate = rxrpc_alloc_bundle(cp, gfp); + candidate = rxrpc_alloc_bundle(call, gfp); if (!candidate) - return NULL; + return -ENOMEM; _debug("search 2"); spin_lock(&local->client_bundles_lock); @@ -329,11 +291,11 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c parent = *pp; bundle = rb_entry(parent, struct rxrpc_bundle, local_node); -#define cmp(X) ((long)bundle->X - (long)cp->X) - diff = (cmp(peer) ?: - cmp(key) ?: - cmp(security_level) ?: - cmp(upgrade)); +#define cmp(X, Y) ((long)(X) - (long)(Y)) + diff = (cmp(bundle->peer, call->peer) ?: + cmp(bundle->key, call->key) ?: + cmp(bundle->security_level, call->security_level) ?: + cmp(bundle->upgrade, upgrade)); #undef cmp if (diff < 0) pp = &(*pp)->rb_left; @@ -347,178 +309,89 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id); rb_link_node(&candidate->local_node, parent, pp); rb_insert_color(&candidate->local_node, &local->client_bundles); - rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call); + call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call); spin_unlock(&local->client_bundles_lock); - _leave(" = %u [new]", candidate->debug_id); - return candidate; + _leave(" = B=%u [new]", call->bundle->debug_id); + return 0; found_bundle_free: rxrpc_free_bundle(candidate); found_bundle: - rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call); - atomic_inc(&bundle->active); + call->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call); + rxrpc_activate_bundle(bundle); spin_unlock(&local->client_bundles_lock); - _leave(" = %u [found]", bundle->debug_id); - return bundle; -} - -/* - * Create or find a client bundle to use for a call. - * - * If we return with a connection, the call will be on its waiting list. It's - * left to the caller to assign a channel and wake up the call. - */ -static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) -{ - struct rxrpc_bundle *bundle; - - _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - - cp->peer = rxrpc_lookup_peer(rx, cp->local, srx, gfp); - if (!cp->peer) - goto error; - - call->tx_last_sent = ktime_get_real(); - call->cong_ssthresh = cp->peer->cong_ssthresh; - if (call->cong_cwnd >= call->cong_ssthresh) - call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; - else - call->cong_mode = RXRPC_CALL_SLOW_START; - if (cp->upgrade) - __set_bit(RXRPC_CALL_UPGRADE, &call->flags); - - /* Find the client connection bundle. */ - bundle = rxrpc_look_up_bundle(cp, gfp); - if (!bundle) - goto error; - - /* Get this call queued. Someone else may activate it whilst we're - * lining up a new connection, but that's fine. - */ - spin_lock(&bundle->channel_lock); - list_add_tail(&call->chan_wait_link, &bundle->waiting_calls); - spin_unlock(&bundle->channel_lock); - - _leave(" = [B=%x]", bundle->debug_id); - return bundle; - -error: - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); + _leave(" = B=%u [found]", call->bundle->debug_id); + return 0; } /* * Allocate a new connection and add it into a bundle. */ -static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp) - __releases(bundle->channel_lock) +static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, + unsigned int slot) { - struct rxrpc_connection *candidate = NULL, *old = NULL; - bool conflict; - int i; - - _enter(""); - - conflict = bundle->alloc_conn; - if (!conflict) - bundle->alloc_conn = true; - spin_unlock(&bundle->channel_lock); - if (conflict) { - _leave(" [conf]"); - return; - } - - candidate = rxrpc_alloc_client_connection(bundle, gfp); - - spin_lock(&bundle->channel_lock); - bundle->alloc_conn = false; - - if (IS_ERR(candidate)) { - bundle->alloc_error = PTR_ERR(candidate); - spin_unlock(&bundle->channel_lock); - _leave(" [err %ld]", PTR_ERR(candidate)); - return; - } - - bundle->alloc_error = 0; - - for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) { - unsigned int shift = i * RXRPC_MAXCALLS; - int j; - - old = bundle->conns[i]; - if (!rxrpc_may_reuse_conn(old)) { - if (old) - trace_rxrpc_client(old, -1, rxrpc_client_replace); - candidate->bundle_shift = shift; - atomic_inc(&bundle->active); - bundle->conns[i] = candidate; - for (j = 0; j < RXRPC_MAXCALLS; j++) - set_bit(shift + j, &bundle->avail_chans); - candidate = NULL; - break; - } + struct rxrpc_connection *conn, *old; + unsigned int shift = slot * RXRPC_MAXCALLS; + unsigned int i; - old = NULL; + old = bundle->conns[slot]; + if (old) { + bundle->conns[slot] = NULL; + trace_rxrpc_client(old, -1, rxrpc_client_replace); + rxrpc_put_connection(old, rxrpc_conn_put_noreuse); } - spin_unlock(&bundle->channel_lock); - - if (candidate) { - _debug("discard C=%x", candidate->debug_id); - trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate); - rxrpc_put_connection(candidate, rxrpc_conn_put_discard); + conn = rxrpc_alloc_client_connection(bundle); + if (IS_ERR(conn)) { + bundle->alloc_error = PTR_ERR(conn); + return false; } - rxrpc_put_connection(old, rxrpc_conn_put_noreuse); - _leave(""); + rxrpc_activate_bundle(bundle); + conn->bundle_shift = shift; + bundle->conns[slot] = conn; + for (i = 0; i < RXRPC_MAXCALLS; i++) + set_bit(shift + i, &bundle->avail_chans); + return true; } /* * Add a connection to a bundle if there are no usable connections or we have * connections waiting for extra capacity. */ -static void rxrpc_maybe_add_conn(struct rxrpc_bundle *bundle, gfp_t gfp) +static bool rxrpc_bundle_has_space(struct rxrpc_bundle *bundle) { - struct rxrpc_call *call; - int i, usable; + int slot = -1, i, usable; _enter(""); - spin_lock(&bundle->channel_lock); + bundle->alloc_error = 0; /* See if there are any usable connections. */ usable = 0; - for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) + for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) { if (rxrpc_may_reuse_conn(bundle->conns[i])) usable++; - - if (!usable && !list_empty(&bundle->waiting_calls)) { - call = list_first_entry(&bundle->waiting_calls, - struct rxrpc_call, chan_wait_link); - if (test_bit(RXRPC_CALL_UPGRADE, &call->flags)) - bundle->try_upgrade = true; + else if (slot == -1) + slot = i; } + if (!usable && bundle->upgrade) + bundle->try_upgrade = true; + if (!usable) goto alloc_conn; if (!bundle->avail_chans && !bundle->try_upgrade && - !list_empty(&bundle->waiting_calls) && usable < ARRAY_SIZE(bundle->conns)) goto alloc_conn; - spin_unlock(&bundle->channel_lock); _leave(""); - return; + return usable; alloc_conn: - return rxrpc_add_conn_to_bundle(bundle, gfp); + return slot >= 0 ? rxrpc_add_conn_to_bundle(bundle, slot) : false; } /* @@ -532,11 +405,13 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, struct rxrpc_channel *chan = &conn->channels[channel]; struct rxrpc_bundle *bundle = conn->bundle; struct rxrpc_call *call = list_entry(bundle->waiting_calls.next, - struct rxrpc_call, chan_wait_link); + struct rxrpc_call, wait_link); u32 call_id = chan->call_counter + 1; _enter("C=%x,%u", conn->debug_id, channel); + list_del_init(&call->wait_link); + trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); /* Cancel the final ACK on the previous call if it hasn't been sent yet @@ -546,68 +421,50 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, clear_bit(conn->bundle_shift + channel, &bundle->avail_chans); rxrpc_see_call(call, rxrpc_call_see_activate_client); - list_del_init(&call->chan_wait_link); - call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_activate_call); call->conn = rxrpc_get_connection(conn, rxrpc_conn_get_activate_call); call->cid = conn->proto.cid | channel; call->call_id = call_id; call->dest_srx.srx_service = conn->service_id; - - trace_rxrpc_connect_call(call); - - write_lock(&call->state_lock); - call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; - write_unlock(&call->state_lock); - - /* Paired with the read barrier in rxrpc_connect_call(). This orders - * cid and epoch in the connection wrt to call_id without the need to - * take the channel_lock. - * - * We provisionally assign a callNumber at this point, but we don't - * confirm it until the call is about to be exposed. - * - * TODO: Pair with a barrier in the data_ready handler when that looks - * at the call ID through a connection channel. - */ - smp_wmb(); + call->cong_ssthresh = call->peer->cong_ssthresh; + if (call->cong_cwnd >= call->cong_ssthresh) + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + else + call->cong_mode = RXRPC_CALL_SLOW_START; chan->call_id = call_id; chan->call_debug_id = call->debug_id; - rcu_assign_pointer(chan->call, call); + chan->call = call; + + rxrpc_see_call(call, rxrpc_call_see_connected); + trace_rxrpc_connect_call(call); + call->tx_last_sent = ktime_get_real(); + rxrpc_start_call_timer(call); + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_SEND_REQUEST); wake_up(&call->waitq); } /* * Remove a connection from the idle list if it's on it. */ -static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connection *conn) +static void rxrpc_unidle_conn(struct rxrpc_connection *conn) { - struct rxrpc_net *rxnet = bundle->local->rxnet; - bool drop_ref; - if (!list_empty(&conn->cache_link)) { - drop_ref = false; - spin_lock(&rxnet->client_conn_cache_lock); - if (!list_empty(&conn->cache_link)) { - list_del_init(&conn->cache_link); - drop_ref = true; - } - spin_unlock(&rxnet->client_conn_cache_lock); - if (drop_ref) - rxrpc_put_connection(conn, rxrpc_conn_put_unidle); + list_del_init(&conn->cache_link); + rxrpc_put_connection(conn, rxrpc_conn_put_unidle); } } /* - * Assign channels and callNumbers to waiting calls with channel_lock - * held by caller. + * Assign channels and callNumbers to waiting calls. */ -static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle) +static void rxrpc_activate_channels(struct rxrpc_bundle *bundle) { struct rxrpc_connection *conn; unsigned long avail, mask; unsigned int channel, slot; + trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans); + if (bundle->try_upgrade) mask = 1; else @@ -627,7 +484,7 @@ static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle) if (bundle->try_upgrade) set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags); - rxrpc_unidle_conn(bundle, conn); + rxrpc_unidle_conn(conn); channel &= (RXRPC_MAXCALLS - 1); conn->act_chans |= 1 << channel; @@ -636,132 +493,24 @@ static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle) } /* - * Assign channels and callNumbers to waiting calls. - */ -static void rxrpc_activate_channels(struct rxrpc_bundle *bundle) -{ - _enter("B=%x", bundle->debug_id); - - trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans); - - if (!bundle->avail_chans) - return; - - spin_lock(&bundle->channel_lock); - rxrpc_activate_channels_locked(bundle); - spin_unlock(&bundle->channel_lock); - _leave(""); -} - -/* - * Wait for a callNumber and a channel to be granted to a call. - */ -static int rxrpc_wait_for_channel(struct rxrpc_bundle *bundle, - struct rxrpc_call *call, gfp_t gfp) -{ - DECLARE_WAITQUEUE(myself, current); - int ret = 0; - - _enter("%d", call->debug_id); - - if (!gfpflags_allow_blocking(gfp)) { - rxrpc_maybe_add_conn(bundle, gfp); - rxrpc_activate_channels(bundle); - ret = bundle->alloc_error ?: -EAGAIN; - goto out; - } - - add_wait_queue_exclusive(&call->waitq, &myself); - for (;;) { - rxrpc_maybe_add_conn(bundle, gfp); - rxrpc_activate_channels(bundle); - ret = bundle->alloc_error; - if (ret < 0) - break; - - switch (call->interruptibility) { - case RXRPC_INTERRUPTIBLE: - case RXRPC_PREINTERRUPTIBLE: - set_current_state(TASK_INTERRUPTIBLE); - break; - case RXRPC_UNINTERRUPTIBLE: - default: - set_current_state(TASK_UNINTERRUPTIBLE); - break; - } - if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_AWAIT_CONN) - break; - if ((call->interruptibility == RXRPC_INTERRUPTIBLE || - call->interruptibility == RXRPC_PREINTERRUPTIBLE) && - signal_pending(current)) { - ret = -ERESTARTSYS; - break; - } - schedule(); - } - remove_wait_queue(&call->waitq, &myself); - __set_current_state(TASK_RUNNING); - -out: - _leave(" = %d", ret); - return ret; -} - -/* - * find a connection for a call - * - called in process context with IRQs enabled + * Connect waiting channels (called from the I/O thread). */ -int rxrpc_connect_call(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) +void rxrpc_connect_client_calls(struct rxrpc_local *local) { - struct rxrpc_bundle *bundle; - struct rxrpc_net *rxnet = cp->local->rxnet; - int ret = 0; - - _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - - rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper); + struct rxrpc_call *call; - bundle = rxrpc_prep_call(rx, call, cp, srx, gfp); - if (IS_ERR(bundle)) { - ret = PTR_ERR(bundle); - goto out; - } + while ((call = list_first_entry_or_null(&local->new_client_calls, + struct rxrpc_call, wait_link)) + ) { + struct rxrpc_bundle *bundle = call->bundle; - if (call->state == RXRPC_CALL_CLIENT_AWAIT_CONN) { - ret = rxrpc_wait_for_channel(bundle, call, gfp); - if (ret < 0) - goto wait_failed; - } + spin_lock(&local->client_call_lock); + list_move_tail(&call->wait_link, &bundle->waiting_calls); + spin_unlock(&local->client_call_lock); -granted_channel: - /* Paired with the write barrier in rxrpc_activate_one_channel(). */ - smp_rmb(); - -out_put_bundle: - rxrpc_deactivate_bundle(bundle); - rxrpc_put_bundle(bundle, rxrpc_bundle_get_client_call); -out: - _leave(" = %d", ret); - return ret; - -wait_failed: - spin_lock(&bundle->channel_lock); - list_del_init(&call->chan_wait_link); - spin_unlock(&bundle->channel_lock); - - if (call->state != RXRPC_CALL_CLIENT_AWAIT_CONN) { - ret = 0; - goto granted_channel; + if (rxrpc_bundle_has_space(bundle)) + rxrpc_activate_channels(bundle); } - - trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed); - rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); - rxrpc_disconnect_client_call(bundle, call); - goto out_put_bundle; } /* @@ -794,14 +543,14 @@ void rxrpc_expose_client_call(struct rxrpc_call *call) /* * Set the reap timer. */ -static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet) +static void rxrpc_set_client_reap_timer(struct rxrpc_local *local) { - if (!rxnet->kill_all_client_conns) { + if (!local->kill_all_client_conns) { unsigned long now = jiffies; unsigned long reap_at = now + rxrpc_conn_idle_client_expiry; - if (rxnet->live) - timer_reduce(&rxnet->client_conn_reap_timer, reap_at); + if (local->rxnet->live) + timer_reduce(&local->client_conn_reap_timer, reap_at); } } @@ -812,16 +561,13 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call { struct rxrpc_connection *conn; struct rxrpc_channel *chan = NULL; - struct rxrpc_net *rxnet = bundle->local->rxnet; + struct rxrpc_local *local = bundle->local; unsigned int channel; bool may_reuse; u32 cid; _enter("c=%x", call->debug_id); - spin_lock(&bundle->channel_lock); - set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); - /* Calls that have never actually been assigned a channel can simply be * discarded. */ @@ -830,8 +576,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call _debug("call is waiting"); ASSERTCMP(call->call_id, ==, 0); ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); - list_del_init(&call->chan_wait_link); - goto out; + list_del_init(&call->wait_link); + return; } cid = call->cid; @@ -839,10 +585,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call chan = &conn->channels[channel]; trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); - if (rcu_access_pointer(chan->call) != call) { - spin_unlock(&bundle->channel_lock); - BUG(); - } + if (WARN_ON(chan->call != call)) + return; may_reuse = rxrpc_may_reuse_conn(conn); @@ -863,16 +607,15 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call trace_rxrpc_client(conn, channel, rxrpc_client_to_active); bundle->try_upgrade = false; if (may_reuse) - rxrpc_activate_channels_locked(bundle); + rxrpc_activate_channels(bundle); } - } /* See if we can pass the channel directly to another call. */ if (may_reuse && !list_empty(&bundle->waiting_calls)) { trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); rxrpc_activate_one_channel(conn, channel); - goto out; + return; } /* Schedule the final ACK to be transmitted in a short while so that it @@ -890,7 +633,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call } /* Deactivate the channel. */ - rcu_assign_pointer(chan->call, NULL); + chan->call = NULL; set_bit(conn->bundle_shift + channel, &conn->bundle->avail_chans); conn->act_chans &= ~(1 << channel); @@ -903,17 +646,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call conn->idle_timestamp = jiffies; rxrpc_get_connection(conn, rxrpc_conn_get_idle); - spin_lock(&rxnet->client_conn_cache_lock); - list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); - spin_unlock(&rxnet->client_conn_cache_lock); + list_move_tail(&conn->cache_link, &local->idle_client_conns); - rxrpc_set_client_reap_timer(rxnet); + rxrpc_set_client_reap_timer(local); } - -out: - spin_unlock(&bundle->channel_lock); - _leave(""); - return; } /* @@ -923,7 +659,6 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) { struct rxrpc_bundle *bundle = conn->bundle; unsigned int bindex; - bool need_drop = false; int i; _enter("C=%x", conn->debug_id); @@ -931,18 +666,13 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) rxrpc_process_delayed_final_acks(conn, true); - spin_lock(&bundle->channel_lock); bindex = conn->bundle_shift / RXRPC_MAXCALLS; if (bundle->conns[bindex] == conn) { _debug("clear slot %u", bindex); bundle->conns[bindex] = NULL; for (i = 0; i < RXRPC_MAXCALLS; i++) clear_bit(conn->bundle_shift + i, &bundle->avail_chans); - need_drop = true; - } - spin_unlock(&bundle->channel_lock); - - if (need_drop) { + rxrpc_put_client_connection_id(bundle->local, conn); rxrpc_deactivate_bundle(bundle); rxrpc_put_connection(conn, rxrpc_conn_put_unbundle); } @@ -951,11 +681,15 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) /* * Drop the active count on a bundle. */ -static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) +void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) { - struct rxrpc_local *local = bundle->local; + struct rxrpc_local *local; bool need_put = false; + if (!bundle) + return; + + local = bundle->local; if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) { if (!bundle->exclusive) { _debug("erase bundle"); @@ -982,7 +716,7 @@ void rxrpc_kill_client_conn(struct rxrpc_connection *conn) trace_rxrpc_client(conn, -1, rxrpc_client_cleanup); atomic_dec(&rxnet->nr_client_conns); - rxrpc_put_client_connection_id(conn); + rxrpc_put_client_connection_id(local, conn); } /* @@ -992,42 +726,26 @@ void rxrpc_kill_client_conn(struct rxrpc_connection *conn) * This may be called from conn setup or from a work item so cannot be * considered non-reentrant. */ -void rxrpc_discard_expired_client_conns(struct work_struct *work) +void rxrpc_discard_expired_client_conns(struct rxrpc_local *local) { struct rxrpc_connection *conn; - struct rxrpc_net *rxnet = - container_of(work, struct rxrpc_net, client_conn_reaper); unsigned long expiry, conn_expires_at, now; unsigned int nr_conns; _enter(""); - if (list_empty(&rxnet->idle_client_conns)) { - _leave(" [empty]"); - return; - } - - /* Don't double up on the discarding */ - if (!mutex_trylock(&rxnet->client_conn_discard_lock)) { - _leave(" [already]"); - return; - } - /* We keep an estimate of what the number of conns ought to be after * we've discarded some so that we don't overdo the discarding. */ - nr_conns = atomic_read(&rxnet->nr_client_conns); + nr_conns = atomic_read(&local->rxnet->nr_client_conns); next: - spin_lock(&rxnet->client_conn_cache_lock); - - if (list_empty(&rxnet->idle_client_conns)) - goto out; - - conn = list_entry(rxnet->idle_client_conns.next, - struct rxrpc_connection, cache_link); + conn = list_first_entry_or_null(&local->idle_client_conns, + struct rxrpc_connection, cache_link); + if (!conn) + return; - if (!rxnet->kill_all_client_conns) { + if (!local->kill_all_client_conns) { /* If the number of connections is over the reap limit, we * expedite discard by reducing the expiry timeout. We must, * however, have at least a short grace period to be able to do @@ -1050,8 +768,6 @@ next: trace_rxrpc_client(conn, -1, rxrpc_client_discard); list_del_init(&conn->cache_link); - spin_unlock(&rxnet->client_conn_cache_lock); - rxrpc_unbundle_conn(conn); /* Drop the ->cache_link ref */ rxrpc_put_connection(conn, rxrpc_conn_put_discard_idle); @@ -1068,31 +784,8 @@ not_yet_expired: * then things get messier. */ _debug("not yet"); - if (!rxnet->kill_all_client_conns) - timer_reduce(&rxnet->client_conn_reap_timer, conn_expires_at); - -out: - spin_unlock(&rxnet->client_conn_cache_lock); - mutex_unlock(&rxnet->client_conn_discard_lock); - _leave(""); -} - -/* - * Preemptively destroy all the client connection records rather than waiting - * for them to time out - */ -void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet) -{ - _enter(""); - - spin_lock(&rxnet->client_conn_cache_lock); - rxnet->kill_all_client_conns = true; - spin_unlock(&rxnet->client_conn_cache_lock); - - del_timer_sync(&rxnet->client_conn_reap_timer); - - if (!rxrpc_queue_work(&rxnet->client_conn_reaper)) - _debug("destroy: queue failed"); + if (!local->kill_all_client_conns) + timer_reduce(&local->client_conn_reap_timer, conn_expires_at); _leave(""); } @@ -1102,29 +795,19 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet) */ void rxrpc_clean_up_local_conns(struct rxrpc_local *local) { - struct rxrpc_connection *conn, *tmp; - struct rxrpc_net *rxnet = local->rxnet; - LIST_HEAD(graveyard); + struct rxrpc_connection *conn; _enter(""); - spin_lock(&rxnet->client_conn_cache_lock); - - list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns, - cache_link) { - if (conn->local == local) { - atomic_dec(&conn->active); - trace_rxrpc_client(conn, -1, rxrpc_client_discard); - list_move(&conn->cache_link, &graveyard); - } - } + local->kill_all_client_conns = true; - spin_unlock(&rxnet->client_conn_cache_lock); + del_timer_sync(&local->client_conn_reap_timer); - while (!list_empty(&graveyard)) { - conn = list_entry(graveyard.next, - struct rxrpc_connection, cache_link); + while ((conn = list_first_entry_or_null(&local->idle_client_conns, + struct rxrpc_connection, cache_link))) { list_del_init(&conn->cache_link); + atomic_dec(&conn->active); + trace_rxrpc_client(conn, -1, rxrpc_client_discard); rxrpc_unbundle_conn(conn); rxrpc_put_connection(conn, rxrpc_conn_put_local_dead); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 480364bcbf85..44414e724415 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -17,11 +17,65 @@ #include "ar-internal.h" /* + * Set the completion state on an aborted connection. + */ +static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff *skb, + s32 abort_code, int err, + enum rxrpc_call_completion compl) +{ + bool aborted = false; + + if (conn->state != RXRPC_CONN_ABORTED) { + spin_lock(&conn->state_lock); + if (conn->state != RXRPC_CONN_ABORTED) { + conn->abort_code = abort_code; + conn->error = err; + conn->completion = compl; + /* Order the abort info before the state change. */ + smp_store_release(&conn->state, RXRPC_CONN_ABORTED); + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + set_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events); + aborted = true; + } + spin_unlock(&conn->state_lock); + } + + return aborted; +} + +/* + * Mark a socket buffer to indicate that the connection it's on should be aborted. + */ +int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb, + s32 abort_code, int err, enum rxrpc_abort_reason why) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + if (rxrpc_set_conn_aborted(conn, skb, abort_code, err, + RXRPC_CALL_LOCALLY_ABORTED)) { + trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber, + sp->hdr.seq, abort_code, err); + rxrpc_poke_conn(conn, rxrpc_conn_get_poke_abort); + } + return -EPROTO; +} + +/* + * Mark a connection as being remotely aborted. + */ +static bool rxrpc_input_conn_abort(struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + return rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED, + RXRPC_CALL_REMOTELY_ABORTED); +} + +/* * Retransmit terminal ACK or ABORT of the previous call. */ -static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, - struct sk_buff *skb, - unsigned int channel) +void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, + struct sk_buff *skb, + unsigned int channel) { struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL; struct rxrpc_channel *chan; @@ -46,9 +100,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, /* If the last call got moved on whilst we were waiting to run, just * ignore this packet. */ - call_id = READ_ONCE(chan->last_call); - /* Sync with __rxrpc_disconnect_call() */ - smp_rmb(); + call_id = chan->last_call; if (skb && call_id != sp->hdr.callNumber) return; @@ -65,9 +117,12 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, iov[2].iov_base = &ack_info; iov[2].iov_len = sizeof(ack_info); + serial = atomic_inc_return(&conn->serial); + pkt.whdr.epoch = htonl(conn->proto.epoch); pkt.whdr.cid = htonl(conn->proto.cid | channel); pkt.whdr.callNumber = htonl(call_id); + pkt.whdr.serial = htonl(serial); pkt.whdr.seq = 0; pkt.whdr.type = chan->last_type; pkt.whdr.flags = conn->out_clientflag; @@ -104,31 +159,15 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, iov[0].iov_len += sizeof(pkt.ack); len += sizeof(pkt.ack) + 3 + sizeof(ack_info); ioc = 3; - break; - - default: - return; - } - - /* Resync with __rxrpc_disconnect_call() and check that the last call - * didn't get advanced whilst we were filling out the packets. - */ - smp_rmb(); - if (READ_ONCE(chan->last_call) != call_id) - return; - - serial = atomic_inc_return(&conn->serial); - pkt.whdr.serial = htonl(serial); - switch (chan->last_type) { - case RXRPC_PACKET_TYPE_ABORT: - break; - case RXRPC_PACKET_TYPE_ACK: trace_rxrpc_tx_ack(chan->call_debug_id, serial, ntohl(pkt.ack.firstPacket), ntohl(pkt.ack.serial), pkt.ack.reason, 0); break; + + default: + return; } ret = kernel_sendmsg(conn->local->socket, &msg, iov, ioc, len); @@ -146,131 +185,34 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, /* * pass a connection-level abort onto all calls on that connection */ -static void rxrpc_abort_calls(struct rxrpc_connection *conn, - enum rxrpc_call_completion compl, - rxrpc_serial_t serial) +static void rxrpc_abort_calls(struct rxrpc_connection *conn) { struct rxrpc_call *call; int i; _enter("{%d},%x", conn->debug_id, conn->abort_code); - spin_lock(&conn->bundle->channel_lock); - for (i = 0; i < RXRPC_MAXCALLS; i++) { - call = rcu_dereference_protected( - conn->channels[i].call, - lockdep_is_held(&conn->bundle->channel_lock)); - if (call) { - if (compl == RXRPC_CALL_LOCALLY_ABORTED) - trace_rxrpc_abort(call->debug_id, - "CON", call->cid, - call->call_id, 0, + call = conn->channels[i].call; + if (call) + rxrpc_set_call_completion(call, + conn->completion, conn->abort_code, conn->error); - else - trace_rxrpc_rx_abort(call, serial, - conn->abort_code); - rxrpc_set_call_completion(call, compl, - conn->abort_code, - conn->error); - } } - spin_unlock(&conn->bundle->channel_lock); _leave(""); } /* - * generate a connection-level abort - */ -static int rxrpc_abort_connection(struct rxrpc_connection *conn, - int error, u32 abort_code) -{ - struct rxrpc_wire_header whdr; - struct msghdr msg; - struct kvec iov[2]; - __be32 word; - size_t len; - u32 serial; - int ret; - - _enter("%d,,%u,%u", conn->debug_id, error, abort_code); - - /* generate a connection-level abort */ - spin_lock(&conn->state_lock); - if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - spin_unlock(&conn->state_lock); - _leave(" = 0 [already dead]"); - return 0; - } - - conn->error = error; - conn->abort_code = abort_code; - conn->state = RXRPC_CONN_LOCALLY_ABORTED; - set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - spin_unlock(&conn->state_lock); - - msg.msg_name = &conn->peer->srx.transport; - msg.msg_namelen = conn->peer->srx.transport_len; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - whdr.epoch = htonl(conn->proto.epoch); - whdr.cid = htonl(conn->proto.cid); - whdr.callNumber = 0; - whdr.seq = 0; - whdr.type = RXRPC_PACKET_TYPE_ABORT; - whdr.flags = conn->out_clientflag; - whdr.userStatus = 0; - whdr.securityIndex = conn->security_ix; - whdr._rsvd = 0; - whdr.serviceId = htons(conn->service_id); - - word = htonl(conn->abort_code); - - iov[0].iov_base = &whdr; - iov[0].iov_len = sizeof(whdr); - iov[1].iov_base = &word; - iov[1].iov_len = sizeof(word); - - len = iov[0].iov_len + iov[1].iov_len; - - serial = atomic_inc_return(&conn->serial); - rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial); - whdr.serial = htonl(serial); - - ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len); - if (ret < 0) { - trace_rxrpc_tx_fail(conn->debug_id, serial, ret, - rxrpc_tx_point_conn_abort); - _debug("sendmsg failed: %d", ret); - return -EAGAIN; - } - - trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort); - - conn->peer->last_tx_at = ktime_get_seconds(); - - _leave(" = 0"); - return 0; -} - -/* * mark a call as being on a now-secured channel * - must be called with BH's disabled. */ static void rxrpc_call_is_secure(struct rxrpc_call *call) { - _enter("%p", call); - if (call) { - write_lock(&call->state_lock); - if (call->state == RXRPC_CALL_SERVER_SECURING) { - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; - rxrpc_notify_socket(call); - } - write_unlock(&call->state_lock); + if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) { + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); + rxrpc_notify_socket(call); } } @@ -278,44 +220,22 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call) * connection-level Rx packet processor */ static int rxrpc_process_event(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - int loop, ret; + int ret; - if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - _leave(" = -ECONNABORTED [%u]", conn->state); + if (conn->state == RXRPC_CONN_ABORTED) return -ECONNABORTED; - } _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial); switch (sp->hdr.type) { - case RXRPC_PACKET_TYPE_DATA: - case RXRPC_PACKET_TYPE_ACK: - rxrpc_conn_retransmit_call(conn, skb, - sp->hdr.cid & RXRPC_CHANNELMASK); - return 0; - - case RXRPC_PACKET_TYPE_BUSY: - /* Just ignore BUSY packets for now. */ - return 0; - - case RXRPC_PACKET_TYPE_ABORT: - conn->error = -ECONNABORTED; - conn->abort_code = skb->priority; - conn->state = RXRPC_CONN_REMOTELY_ABORTED; - set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); - return -ECONNABORTED; - case RXRPC_PACKET_TYPE_CHALLENGE: - return conn->security->respond_to_challenge(conn, skb, - _abort_code); + return conn->security->respond_to_challenge(conn, skb); case RXRPC_PACKET_TYPE_RESPONSE: - ret = conn->security->verify_response(conn, skb, _abort_code); + ret = conn->security->verify_response(conn, skb); if (ret < 0) return ret; @@ -324,27 +244,25 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, if (ret < 0) return ret; - spin_lock(&conn->bundle->channel_lock); spin_lock(&conn->state_lock); - - if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { + if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) conn->state = RXRPC_CONN_SERVICE; - spin_unlock(&conn->state_lock); - for (loop = 0; loop < RXRPC_MAXCALLS; loop++) - rxrpc_call_is_secure( - rcu_dereference_protected( - conn->channels[loop].call, - lockdep_is_held(&conn->bundle->channel_lock))); - } else { - spin_unlock(&conn->state_lock); - } + spin_unlock(&conn->state_lock); - spin_unlock(&conn->bundle->channel_lock); + if (conn->state == RXRPC_CONN_SERVICE) { + /* Offload call state flipping to the I/O thread. As + * we've already received the packet, put it on the + * front of the queue. + */ + skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED; + rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured); + skb_queue_head(&conn->local->rx_queue, skb); + rxrpc_wake_up_io_thread(conn->local); + } return 0; default: - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("bad_conn_pkt")); + WARN_ON_ONCE(1); return -EPROTO; } } @@ -354,26 +272,9 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, */ static void rxrpc_secure_connection(struct rxrpc_connection *conn) { - u32 abort_code; - int ret; - - _enter("{%d}", conn->debug_id); - - ASSERT(conn->security_ix != 0); - - if (conn->security->issue_challenge(conn) < 0) { - abort_code = RX_CALL_DEAD; - ret = -ENOMEM; - goto abort; - } - - _leave(""); - return; - -abort: - _debug("abort %d, %d", ret, abort_code); - rxrpc_abort_connection(conn, ret, abort_code); - _leave(" [aborted]"); + if (conn->security->issue_challenge(conn) < 0) + rxrpc_abort_conn(conn, NULL, RX_CALL_DEAD, -ENOMEM, + rxrpc_abort_nomem); } /* @@ -395,9 +296,7 @@ again: if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags)) continue; - smp_rmb(); /* vs rxrpc_disconnect_client_call */ - ack_at = READ_ONCE(chan->final_ack_at); - + ack_at = chan->final_ack_at; if (time_before(j, ack_at) && !force) { if (time_before(ack_at, next_j)) { next_j = ack_at; @@ -424,47 +323,27 @@ again: static void rxrpc_do_process_connection(struct rxrpc_connection *conn) { struct sk_buff *skb; - u32 abort_code = RX_PROTOCOL_ERROR; int ret; if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); - /* Process delayed ACKs whose time has come. */ - if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) - rxrpc_process_delayed_final_acks(conn, false); - /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { rxrpc_see_skb(skb, rxrpc_skb_see_conn_work); - ret = rxrpc_process_event(conn, skb, &abort_code); + ret = rxrpc_process_event(conn, skb); switch (ret) { - case -EPROTO: - case -EKEYEXPIRED: - case -EKEYREJECTED: - goto protocol_error; case -ENOMEM: case -EAGAIN: - goto requeue_and_leave; - case -ECONNABORTED: + skb_queue_head(&conn->rx_queue, skb); + rxrpc_queue_conn(conn, rxrpc_conn_queue_retry_work); + break; default: rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); break; } } - - return; - -requeue_and_leave: - skb_queue_head(&conn->rx_queue, skb); - return; - -protocol_error: - if (rxrpc_abort_connection(conn, ret, abort_code) < 0) - goto requeue_and_leave; - rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); - return; } void rxrpc_process_connection(struct work_struct *work) @@ -498,44 +377,59 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, /* * Input a connection-level packet. */ -int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) +bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - _leave(" = -ECONNABORTED [%u]", conn->state); - return -ECONNABORTED; - } - - _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial); - switch (sp->hdr.type) { - case RXRPC_PACKET_TYPE_DATA: - case RXRPC_PACKET_TYPE_ACK: - rxrpc_conn_retransmit_call(conn, skb, - sp->hdr.cid & RXRPC_CHANNELMASK); - return 0; - case RXRPC_PACKET_TYPE_BUSY: /* Just ignore BUSY packets for now. */ - return 0; + return true; case RXRPC_PACKET_TYPE_ABORT: - conn->error = -ECONNABORTED; - conn->abort_code = skb->priority; - conn->state = RXRPC_CONN_REMOTELY_ABORTED; - set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); - return -ECONNABORTED; + if (rxrpc_is_conn_aborted(conn)) + return true; + rxrpc_input_conn_abort(conn, skb); + rxrpc_abort_calls(conn); + return true; case RXRPC_PACKET_TYPE_CHALLENGE: case RXRPC_PACKET_TYPE_RESPONSE: + if (rxrpc_is_conn_aborted(conn)) { + if (conn->completion == RXRPC_CALL_LOCALLY_ABORTED) + rxrpc_send_conn_abort(conn); + return true; + } rxrpc_post_packet_to_conn(conn, skb); - return 0; + return true; default: - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("bad_conn_pkt")); - return -EPROTO; + WARN_ON_ONCE(1); + return true; } } + +/* + * Input a connection event. + */ +void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb) +{ + unsigned int loop; + + if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events)) + rxrpc_abort_calls(conn); + + switch (skb->mark) { + case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: + if (conn->state != RXRPC_CONN_SERVICE) + break; + + for (loop = 0; loop < RXRPC_MAXCALLS; loop++) + rxrpc_call_is_secure(conn->channels[loop].call); + break; + } + + /* Process delayed ACKs whose time has come. */ + if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) + rxrpc_process_delayed_final_acks(conn, false); +} diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 3c8f83dacb2b..ac85d4644a3c 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -23,12 +23,30 @@ static void rxrpc_clean_up_connection(struct work_struct *work); static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, unsigned long reap_at); +void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) +{ + struct rxrpc_local *local = conn->local; + bool busy; + + if (WARN_ON_ONCE(!local)) + return; + + spin_lock_bh(&local->lock); + busy = !list_empty(&conn->attend_link); + if (!busy) { + rxrpc_get_connection(conn, why); + list_add_tail(&conn->attend_link, &local->conn_attend_q); + } + spin_unlock_bh(&local->lock); + rxrpc_wake_up_io_thread(local); +} + static void rxrpc_connection_timer(struct timer_list *timer) { struct rxrpc_connection *conn = container_of(timer, struct rxrpc_connection, timer); - rxrpc_queue_conn(conn, rxrpc_conn_queue_timer); + rxrpc_poke_conn(conn, rxrpc_conn_get_poke_timer); } /* @@ -49,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, INIT_WORK(&conn->destructor, rxrpc_clean_up_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); + mutex_init(&conn->security_lock); skb_queue_head_init(&conn->rx_queue); conn->rxnet = rxnet; conn->security = &rxrpc_no_security; @@ -82,10 +101,10 @@ struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *lo _enter(",%x", sp->hdr.cid & RXRPC_CIDMASK); - /* Look up client connections by connection ID alone as their IDs are - * unique for this machine. + /* Look up client connections by connection ID alone as their + * IDs are unique for this machine. */ - conn = idr_find(&rxrpc_client_conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT); + conn = idr_find(&local->conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT); if (!conn || refcount_read(&conn->ref) == 0) { _debug("no conn"); goto not_found; @@ -139,7 +158,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, _enter("%d,%x", conn->debug_id, call->cid); - if (rcu_access_pointer(chan->call) == call) { + if (chan->call == call) { /* Save the result of the call so that we can repeat it if necessary * through the channel, whilst disposing of the actual call record. */ @@ -159,12 +178,9 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, break; } - /* Sync with rxrpc_conn_retransmit(). */ - smp_wmb(); chan->last_call = chan->call_id; chan->call_id = chan->call_counter; - - rcu_assign_pointer(chan->call, NULL); + chan->call = NULL; } _leave(""); @@ -178,6 +194,9 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + rxrpc_see_call(call, rxrpc_call_see_disconnected); + call->peer->cong_ssthresh = call->cong_ssthresh; if (!hlist_unhashed(&call->error_link)) { @@ -186,18 +205,17 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) spin_unlock(&call->peer->lock); } - if (rxrpc_is_client_call(call)) - return rxrpc_disconnect_client_call(conn->bundle, call); - - spin_lock(&conn->bundle->channel_lock); - __rxrpc_disconnect_call(conn, call); - spin_unlock(&conn->bundle->channel_lock); + if (rxrpc_is_client_call(call)) { + rxrpc_disconnect_client_call(call->bundle, call); + } else { + __rxrpc_disconnect_call(conn, call); + conn->idle_timestamp = jiffies; + if (atomic_dec_and_test(&conn->active)) + rxrpc_set_service_reap_timer(conn->rxnet, + jiffies + rxrpc_connection_expiry); + } - set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); - conn->idle_timestamp = jiffies; - if (atomic_dec_and_test(&conn->active)) - rxrpc_set_service_reap_timer(conn->rxnet, - jiffies + rxrpc_connection_expiry); + rxrpc_put_call(call, rxrpc_call_put_io_thread); } /* @@ -293,10 +311,10 @@ static void rxrpc_clean_up_connection(struct work_struct *work) container_of(work, struct rxrpc_connection, destructor); struct rxrpc_net *rxnet = conn->rxnet; - ASSERT(!rcu_access_pointer(conn->channels[0].call) && - !rcu_access_pointer(conn->channels[1].call) && - !rcu_access_pointer(conn->channels[2].call) && - !rcu_access_pointer(conn->channels[3].call)); + ASSERT(!conn->channels[0].call && + !conn->channels[1].call && + !conn->channels[2].call && + !conn->channels[3].call); ASSERT(list_empty(&conn->cache_link)); del_timer_sync(&conn->timer); @@ -447,7 +465,6 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) _enter(""); atomic_dec(&rxnet->nr_conns); - rxrpc_destroy_all_client_connections(rxnet); del_timer_sync(&rxnet->service_conn_reap_timer); rxrpc_queue_work(&rxnet->service_conn_reaper); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 2a55a88b2a5b..f30323de82bd 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -11,7 +11,6 @@ static struct rxrpc_bundle rxrpc_service_dummy_bundle = { .ref = REFCOUNT_INIT(1), .debug_id = UINT_MAX, - .channel_lock = __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock), }; /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index d0e20e946e48..367927a99881 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -9,11 +9,10 @@ #include "ar-internal.h" -static void rxrpc_proto_abort(const char *why, - struct rxrpc_call *call, rxrpc_seq_t seq) +static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq, + enum rxrpc_abort_reason why) { - if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) - rxrpc_send_abort_packet(call); + rxrpc_abort_call(call, seq, RX_PROTOCOL_ERROR, -EBADMSG, why); } /* @@ -185,7 +184,7 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) if (call->cong_mode != RXRPC_CALL_SLOW_START && call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE) return; - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) + if (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_REPLY) return; rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8)); @@ -250,47 +249,34 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, * This occurs when we get an ACKALL packet, the first DATA packet of a reply, * or a final ACK packet. */ -static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, - const char *abort_why) +static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, + enum rxrpc_abort_reason abort_why) { - unsigned int state; - ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); - write_lock(&call->state_lock); - - state = call->state; - switch (state) { + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: - if (reply_begun) - call->state = state = RXRPC_CALL_CLIENT_RECV_REPLY; - else - call->state = state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + if (reply_begun) { + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_RECV_REPLY); + trace_rxrpc_txqueue(call, rxrpc_txqueue_end); + break; + } + + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_REPLY); + trace_rxrpc_txqueue(call, rxrpc_txqueue_await_reply); break; case RXRPC_CALL_SERVER_AWAIT_ACK: - __rxrpc_call_completed(call); - state = call->state; + rxrpc_call_completed(call); + trace_rxrpc_txqueue(call, rxrpc_txqueue_end); break; default: - goto bad_state; + kdebug("end_tx %s", rxrpc_call_states[__rxrpc_call_state(call)]); + rxrpc_proto_abort(call, call->tx_top, abort_why); + break; } - - write_unlock(&call->state_lock); - if (state == RXRPC_CALL_CLIENT_AWAIT_REPLY) - trace_rxrpc_txqueue(call, rxrpc_txqueue_await_reply); - else - trace_rxrpc_txqueue(call, rxrpc_txqueue_end); - _leave(" = ok"); - return true; - -bad_state: - write_unlock(&call->state_lock); - kdebug("end_tx %s", rxrpc_call_states[call->state]); - rxrpc_proto_abort(abort_why, call, call->tx_top); - return false; } /* @@ -305,18 +291,48 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) if (call->ackr_reason) { now = jiffies; timo = now + MAX_JIFFY_OFFSET; - WRITE_ONCE(call->resend_at, timo); + WRITE_ONCE(call->delay_ack_at, timo); trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); } if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { if (!rxrpc_rotate_tx_window(call, top, &summary)) { - rxrpc_proto_abort("TXL", call, top); + rxrpc_proto_abort(call, top, rxrpc_eproto_early_reply); return false; } } - return rxrpc_end_tx_phase(call, true, "ETD"); + + rxrpc_end_tx_phase(call, true, rxrpc_eproto_unexpected_reply); + return true; +} + +/* + * End the packet reception phase. + */ +static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) +{ + rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq); + + _enter("%d,%s", call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)]); + + trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh); + + switch (__rxrpc_call_state(call)) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack); + rxrpc_call_completed(call); + break; + + case RXRPC_CALL_SERVER_RECV_REQUEST: + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_ACK_REQUEST); + call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; + rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_processing_op); + break; + + default: + break; + } } static void rxrpc_input_update_ack_window(struct rxrpc_call *call, @@ -337,8 +353,9 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, __skb_queue_tail(&call->recvmsg_queue, skb); rxrpc_input_update_ack_window(call, window, wtop); - trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq); + if (last) + rxrpc_end_rx_phase(call, sp->hdr.serial); } /* @@ -366,17 +383,14 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, if (last) { if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) && - seq + 1 != wtop) { - rxrpc_proto_abort("LSN", call, seq); - return; - } + seq + 1 != wtop) + return rxrpc_proto_abort(call, seq, rxrpc_eproto_different_last); } else { if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && after_eq(seq, wtop)) { pr_warn("Packet beyond last: c=%x q=%x window=%x-%x wlimit=%x\n", call->debug_id, seq, window, wtop, wlimit); - rxrpc_proto_abort("LSA", call, seq); - return; + return rxrpc_proto_abort(call, seq, rxrpc_eproto_data_after_last); } } @@ -550,7 +564,6 @@ protocol_error: static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - enum rxrpc_call_state state; rxrpc_serial_t serial = sp->hdr.serial; rxrpc_seq_t seq0 = sp->hdr.seq; @@ -558,11 +571,20 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) atomic64_read(&call->ackr_window), call->rx_highest_seq, skb->len, seq0); - state = READ_ONCE(call->state); - if (state >= RXRPC_CALL_COMPLETE) + if (__rxrpc_call_is_complete(call)) return; - if (state == RXRPC_CALL_SERVER_RECV_REQUEST) { + switch (__rxrpc_call_state(call)) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + /* Received data implicitly ACKs all of the request + * packets we sent when we're acting as a client. + */ + if (!rxrpc_receiving_reply(call)) + goto out_notify; + break; + + case RXRPC_CALL_SERVER_RECV_REQUEST: { unsigned long timo = READ_ONCE(call->next_req_timo); unsigned long now, expect_req_by; @@ -573,18 +595,15 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_reduce_call_timer(call, expect_req_by, now, rxrpc_timer_set_for_idle); } + break; } - /* Received data implicitly ACKs all of the request packets we sent - * when we're acting as a client. - */ - if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST || - state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && - !rxrpc_receiving_reply(call)) - goto out_notify; + default: + break; + } if (!rxrpc_input_split_jumbo(call, skb)) { - rxrpc_proto_abort("VLD", call, sp->hdr.seq); + rxrpc_proto_abort(call, sp->hdr.seq, rxrpc_badmsg_bad_jumbo); goto out_notify; } skb = NULL; @@ -765,7 +784,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) offset = sizeof(struct rxrpc_wire_header); if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0) - return rxrpc_proto_abort("XAK", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack); offset += sizeof(ack); ack_serial = sp->hdr.serial; @@ -845,7 +864,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) ioffset = offset + nr_acks + 3; if (skb->len >= ioffset + sizeof(info) && skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0) - return rxrpc_proto_abort("XAI", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_info); if (nr_acks > 0) skb_condense(skb); @@ -868,10 +887,10 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_input_ackinfo(call, skb, &info); if (first_soft_ack == 0) - return rxrpc_proto_abort("AK0", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero); /* Ignore ACKs unless we are or have just been transmitting. */ - switch (READ_ONCE(call->state)) { + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: case RXRPC_CALL_SERVER_SEND_REPLY: @@ -883,20 +902,20 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (before(hard_ack, call->acks_hard_ack) || after(hard_ack, call->tx_top)) - return rxrpc_proto_abort("AKW", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_outside_window); if (nr_acks > call->tx_top - hard_ack) - return rxrpc_proto_abort("AKN", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow); if (after(hard_ack, call->acks_hard_ack)) { if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { - rxrpc_end_tx_phase(call, false, "ETA"); + rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack); return; } } if (nr_acks > 0) { if (offset > (int)skb->len - nr_acks) - return rxrpc_proto_abort("XSA", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack); rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack, nr_acks, &summary); } @@ -918,7 +937,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_ack_summary summary = { 0 }; if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) - rxrpc_end_tx_phase(call, false, "ETL"); + rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ackall); } /* @@ -963,27 +982,23 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb) switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: - rxrpc_input_data(call, skb); - break; + return rxrpc_input_data(call, skb); case RXRPC_PACKET_TYPE_ACK: - rxrpc_input_ack(call, skb); - break; + return rxrpc_input_ack(call, skb); case RXRPC_PACKET_TYPE_BUSY: /* Just ignore BUSY packets from the server; the retry and * lifespan timers will take care of business. BUSY packets * from the client don't make sense. */ - break; + return; case RXRPC_PACKET_TYPE_ABORT: - rxrpc_input_abort(call, skb); - break; + return rxrpc_input_abort(call, skb); case RXRPC_PACKET_TYPE_ACKALL: - rxrpc_input_ackall(call, skb); - break; + return rxrpc_input_ackall(call, skb); default: break; @@ -998,24 +1013,18 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb) */ void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb) { - struct rxrpc_connection *conn = call->conn; - - switch (READ_ONCE(call->state)) { + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_SERVER_AWAIT_ACK: rxrpc_call_completed(call); fallthrough; case RXRPC_CALL_COMPLETE: break; default: - if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) - rxrpc_send_abort_packet(call); + rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ESHUTDOWN, + rxrpc_eproto_improper_term); trace_rxrpc_improper_term(call); break; } rxrpc_input_call_event(call, skb); - - spin_lock(&conn->bundle->channel_lock); - __rxrpc_disconnect_call(conn, call); - spin_unlock(&conn->bundle->channel_lock); } diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index 0eb8471bfc53..34353b6e584b 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -43,25 +43,17 @@ static void none_free_call_crypto(struct rxrpc_call *call) } static int none_respond_to_challenge(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("chall_none")); - return -EPROTO; + return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, + rxrpc_eproto_rxnull_challenge); } static int none_verify_response(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("resp_none")); - return -EPROTO; + return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, + rxrpc_eproto_rxnull_response); } static void none_clear(struct rxrpc_connection *conn) diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 1ad067d66fb6..9e9dfb2fc559 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -67,9 +67,31 @@ void rxrpc_error_report(struct sock *sk) } /* + * Directly produce an abort from a packet. + */ +bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why, + s32 abort_code, int err) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + abort_code, err); + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; + skb->priority = abort_code; + return false; +} + +static bool rxrpc_bad_message(struct sk_buff *skb, enum rxrpc_abort_reason why) +{ + return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EBADMSG); +} + +#define just_discard true + +/* * Process event packets targeted at a local endpoint. */ -static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb) +static bool rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); char v; @@ -81,22 +103,21 @@ static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb) if (v == 0) rxrpc_send_version_request(local, &sp->hdr, skb); } + + return true; } /* * Extract the wire header from a packet and translate the byte order. */ -static noinline -int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) +static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp, + struct sk_buff *skb) { struct rxrpc_wire_header whdr; /* dig out the RxRPC connection details */ - if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) { - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("bad_hdr")); - return -EBADMSG; - } + if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) + return rxrpc_bad_message(skb, rxrpc_badmsg_short_hdr); memset(sp, 0, sizeof(*sp)); sp->hdr.epoch = ntohl(whdr.epoch); @@ -110,7 +131,7 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) sp->hdr.securityIndex = whdr.securityIndex; sp->hdr._rsvd = ntohs(whdr._rsvd); sp->hdr.serviceId = ntohs(whdr.serviceId); - return 0; + return true; } /* @@ -130,28 +151,28 @@ static bool rxrpc_extract_abort(struct sk_buff *skb) /* * Process packets received on the local endpoint */ -static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) +static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) { struct rxrpc_connection *conn; struct sockaddr_rxrpc peer_srx; struct rxrpc_skb_priv *sp; struct rxrpc_peer *peer = NULL; struct sk_buff *skb = *_skb; - int ret = 0; + bool ret = false; skb_pull(skb, sizeof(struct udphdr)); sp = rxrpc_skb(skb); /* dig out the RxRPC connection details */ - if (rxrpc_extract_header(sp, skb) < 0) - goto bad_message; + if (!rxrpc_extract_header(sp, skb)) + return just_discard; if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { trace_rxrpc_rx_lose(sp); - return 0; + return just_discard; } } @@ -160,28 +181,28 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: if (rxrpc_to_client(sp)) - return 0; - rxrpc_input_version(local, skb); - return 0; + return just_discard; + return rxrpc_input_version(local, skb); case RXRPC_PACKET_TYPE_BUSY: if (rxrpc_to_server(sp)) - return 0; + return just_discard; fallthrough; case RXRPC_PACKET_TYPE_ACK: case RXRPC_PACKET_TYPE_ACKALL: if (sp->hdr.callNumber == 0) - goto bad_message; + return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call); break; case RXRPC_PACKET_TYPE_ABORT: if (!rxrpc_extract_abort(skb)) - return 0; /* Just discard if malformed */ + return just_discard; /* Just discard if malformed */ break; case RXRPC_PACKET_TYPE_DATA: - if (sp->hdr.callNumber == 0 || - sp->hdr.seq == 0) - goto bad_message; + if (sp->hdr.callNumber == 0) + return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call); + if (sp->hdr.seq == 0) + return rxrpc_bad_message(skb, rxrpc_badmsg_zero_seq); /* Unshare the packet so that it can be modified for in-place * decryption. @@ -191,7 +212,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) if (!skb) { rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem); *_skb = NULL; - return 0; + return just_discard; } if (skb != *_skb) { @@ -205,28 +226,28 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) case RXRPC_PACKET_TYPE_CHALLENGE: if (rxrpc_to_server(sp)) - return 0; + return just_discard; break; case RXRPC_PACKET_TYPE_RESPONSE: if (rxrpc_to_client(sp)) - return 0; + return just_discard; break; /* Packet types 9-11 should just be ignored. */ case RXRPC_PACKET_TYPE_PARAMS: case RXRPC_PACKET_TYPE_10: case RXRPC_PACKET_TYPE_11: - return 0; + return just_discard; default: - goto bad_message; + return rxrpc_bad_message(skb, rxrpc_badmsg_unsupported_packet); } if (sp->hdr.serviceId == 0) - goto bad_message; + return rxrpc_bad_message(skb, rxrpc_badmsg_zero_service); if (WARN_ON_ONCE(rxrpc_extract_addr_from_skb(&peer_srx, skb) < 0)) - return true; /* Unsupported address type - discard. */ + return just_discard; /* Unsupported address type. */ if (peer_srx.transport.family != local->srx.transport.family && (peer_srx.transport.family == AF_INET && @@ -234,7 +255,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n", peer_srx.transport.family, local->srx.transport.family); - return true; /* Wrong address type - discard. */ + return just_discard; /* Wrong address type. */ } if (rxrpc_to_client(sp)) { @@ -242,12 +263,8 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) conn = rxrpc_find_client_connection_rcu(local, &peer_srx, skb); conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input); rcu_read_unlock(); - if (!conn) { - trace_rxrpc_abort(0, "NCC", sp->hdr.cid, - sp->hdr.callNumber, sp->hdr.seq, - RXKADINCONSISTENCY, EBADMSG); - goto protocol_error; - } + if (!conn) + return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_conn); ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb); rxrpc_put_connection(conn, rxrpc_conn_put_call_input); @@ -280,19 +297,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) ret = rxrpc_new_incoming_call(local, peer, NULL, &peer_srx, skb); rxrpc_put_peer(peer, rxrpc_peer_put_input); - if (ret < 0) - goto reject_packet; - return 0; - -bad_message: - trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_PROTOCOL_ERROR, EBADMSG); -protocol_error: - skb->priority = RX_PROTOCOL_ERROR; - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; -reject_packet: - rxrpc_reject_packet(local, skb); - return 0; + return ret; } /* @@ -306,21 +311,23 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, struct rxrpc_channel *chan; struct rxrpc_call *call = NULL; unsigned int channel; + bool ret; if (sp->hdr.securityIndex != conn->security_ix) - goto wrong_security; + return rxrpc_direct_abort(skb, rxrpc_eproto_wrong_security, + RXKADINCONSISTENCY, -EBADMSG); if (sp->hdr.serviceId != conn->service_id) { int old_id; if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) - goto reupgrade; + return rxrpc_protocol_error(skb, rxrpc_eproto_reupgrade); + old_id = cmpxchg(&conn->service_id, conn->orig_service_id, sp->hdr.serviceId); - if (old_id != conn->orig_service_id && old_id != sp->hdr.serviceId) - goto reupgrade; + return rxrpc_protocol_error(skb, rxrpc_eproto_bad_upgrade); } if (after(sp->hdr.serial, conn->hi_serial)) @@ -336,19 +343,19 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, /* Ignore really old calls */ if (sp->hdr.callNumber < chan->last_call) - return 0; + return just_discard; if (sp->hdr.callNumber == chan->last_call) { if (chan->call || sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) - return 0; + return just_discard; /* For the previous service call, if completed successfully, we * discard all further packets. */ if (rxrpc_conn_is_service(conn) && chan->last_type == RXRPC_PACKET_TYPE_ACK) - return 0; + return just_discard; /* But otherwise we need to retransmit the final packet from * data cached in the connection record. @@ -358,19 +365,17 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, sp->hdr.seq, sp->hdr.serial, sp->hdr.flags); - rxrpc_input_conn_packet(conn, skb); - return 0; + rxrpc_conn_retransmit_call(conn, skb, channel); + return just_discard; } - rcu_read_lock(); - call = rxrpc_try_get_call(rcu_dereference(chan->call), - rxrpc_call_get_input); - rcu_read_unlock(); + call = rxrpc_try_get_call(chan->call, rxrpc_call_get_input); if (sp->hdr.callNumber > chan->call_id) { if (rxrpc_to_client(sp)) { rxrpc_put_call(call, rxrpc_call_put_input); - goto reject_packet; + return rxrpc_protocol_error(skb, + rxrpc_eproto_unexpected_implicit_end); } if (call) { @@ -382,38 +387,14 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, if (!call) { if (rxrpc_to_client(sp)) - goto bad_message; - if (rxrpc_new_incoming_call(conn->local, conn->peer, conn, - peer_srx, skb) == 0) - return 0; - goto reject_packet; + return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_call); + return rxrpc_new_incoming_call(conn->local, conn->peer, conn, + peer_srx, skb); } - rxrpc_input_call_event(call, skb); + ret = rxrpc_input_call_event(call, skb); rxrpc_put_call(call, rxrpc_call_put_input); - return 0; - -wrong_security: - trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RXKADINCONSISTENCY, EBADMSG); - skb->priority = RXKADINCONSISTENCY; - goto post_abort; - -reupgrade: - trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_PROTOCOL_ERROR, EBADMSG); - goto protocol_error; - -bad_message: - trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_PROTOCOL_ERROR, EBADMSG); -protocol_error: - skb->priority = RX_PROTOCOL_ERROR; -post_abort: - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; -reject_packet: - rxrpc_reject_packet(conn->local, skb); - return 0; + return ret; } /* @@ -421,6 +402,7 @@ reject_packet: */ int rxrpc_io_thread(void *data) { + struct rxrpc_connection *conn; struct sk_buff_head rx_queue; struct rxrpc_local *local = data; struct rxrpc_call *call; @@ -436,6 +418,24 @@ int rxrpc_io_thread(void *data) for (;;) { rxrpc_inc_stat(local->rxnet, stat_io_loop); + /* Deal with connections that want immediate attention. */ + conn = list_first_entry_or_null(&local->conn_attend_q, + struct rxrpc_connection, + attend_link); + if (conn) { + spin_lock_bh(&local->lock); + list_del_init(&conn->attend_link); + spin_unlock_bh(&local->lock); + + rxrpc_input_conn_event(conn, NULL); + rxrpc_put_connection(conn, rxrpc_conn_put_poke); + continue; + } + + if (test_and_clear_bit(RXRPC_CLIENT_CONN_REAP_TIMER, + &local->client_conn_flags)) + rxrpc_discard_expired_client_conns(local); + /* Deal with calls that want immediate attention. */ if ((call = list_first_entry_or_null(&local->call_attend_q, struct rxrpc_call, @@ -450,12 +450,17 @@ int rxrpc_io_thread(void *data) continue; } + if (!list_empty(&local->new_client_calls)) + rxrpc_connect_client_calls(local); + /* Process received packets and errors. */ if ((skb = __skb_dequeue(&rx_queue))) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); switch (skb->mark) { case RXRPC_SKB_MARK_PACKET: skb->priority = 0; - rxrpc_input_packet(local, &skb); + if (!rxrpc_input_packet(local, &skb)) + rxrpc_reject_packet(local, skb); trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_free_skb(skb, rxrpc_skb_put_input); break; @@ -463,6 +468,11 @@ int rxrpc_io_thread(void *data) rxrpc_input_error(local, skb); rxrpc_free_skb(skb, rxrpc_skb_put_error_report); break; + case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: + rxrpc_input_conn_event(sp->conn, skb); + rxrpc_put_connection(sp->conn, rxrpc_conn_put_poke); + rxrpc_free_skb(skb, rxrpc_skb_put_conn_secured); + break; default: WARN_ON_ONCE(1); rxrpc_free_skb(skb, rxrpc_skb_put_unknown); @@ -481,7 +491,11 @@ int rxrpc_io_thread(void *data) set_current_state(TASK_INTERRUPTIBLE); should_stop = kthread_should_stop(); if (!skb_queue_empty(&local->rx_queue) || - !list_empty(&local->call_attend_q)) { + !list_empty(&local->call_attend_q) || + !list_empty(&local->conn_attend_q) || + !list_empty(&local->new_client_calls) || + test_bit(RXRPC_CLIENT_CONN_REAP_TIMER, + &local->client_conn_flags)) { __set_current_state(TASK_RUNNING); continue; } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 270b63d8f37a..b8eaca5d9f22 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -82,31 +82,59 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local, } } +static void rxrpc_client_conn_reap_timeout(struct timer_list *timer) +{ + struct rxrpc_local *local = + container_of(timer, struct rxrpc_local, client_conn_reap_timer); + + if (local->kill_all_client_conns && + test_and_set_bit(RXRPC_CLIENT_CONN_REAP_TIMER, &local->client_conn_flags)) + rxrpc_wake_up_io_thread(local); +} + /* * Allocate a new local endpoint. */ -static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, +static struct rxrpc_local *rxrpc_alloc_local(struct net *net, const struct sockaddr_rxrpc *srx) { struct rxrpc_local *local; + u32 tmp; local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL); if (local) { refcount_set(&local->ref, 1); atomic_set(&local->active_users, 1); - local->rxnet = rxnet; + local->net = net; + local->rxnet = rxrpc_net(net); INIT_HLIST_NODE(&local->link); init_rwsem(&local->defrag_sem); init_completion(&local->io_thread_ready); skb_queue_head_init(&local->rx_queue); + INIT_LIST_HEAD(&local->conn_attend_q); INIT_LIST_HEAD(&local->call_attend_q); + local->client_bundles = RB_ROOT; spin_lock_init(&local->client_bundles_lock); + local->kill_all_client_conns = false; + INIT_LIST_HEAD(&local->idle_client_conns); + timer_setup(&local->client_conn_reap_timer, + rxrpc_client_conn_reap_timeout, 0); + spin_lock_init(&local->lock); rwlock_init(&local->services_lock); local->debug_id = atomic_inc_return(&rxrpc_debug_id); memcpy(&local->srx, srx, sizeof(*srx)); local->srx.srx_service = 0; + idr_init(&local->conn_ids); + get_random_bytes(&tmp, sizeof(tmp)); + tmp &= 0x3fffffff; + if (tmp == 0) + tmp = 1; + idr_set_cursor(&local->conn_ids, tmp); + INIT_LIST_HEAD(&local->new_client_calls); + spin_lock_init(&local->client_call_lock); + trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, 1); } @@ -248,7 +276,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, goto found; } - local = rxrpc_alloc_local(rxnet, srx); + local = rxrpc_alloc_local(net, srx); if (!local) goto nomem; @@ -407,6 +435,7 @@ void rxrpc_destroy_local(struct rxrpc_local *local) * local endpoint. */ rxrpc_purge_queue(&local->rx_queue); + rxrpc_purge_client_connections(local); } /* diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 5905530e2f33..a0319c040c25 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -10,15 +10,6 @@ unsigned int rxrpc_net_id; -static void rxrpc_client_conn_reap_timeout(struct timer_list *timer) -{ - struct rxrpc_net *rxnet = - container_of(timer, struct rxrpc_net, client_conn_reap_timer); - - if (rxnet->live) - rxrpc_queue_work(&rxnet->client_conn_reaper); -} - static void rxrpc_service_conn_reap_timeout(struct timer_list *timer) { struct rxrpc_net *rxnet = @@ -63,14 +54,6 @@ static __net_init int rxrpc_init_net(struct net *net) rxrpc_service_conn_reap_timeout, 0); atomic_set(&rxnet->nr_client_conns, 0); - rxnet->kill_all_client_conns = false; - spin_lock_init(&rxnet->client_conn_cache_lock); - mutex_init(&rxnet->client_conn_discard_lock); - INIT_LIST_HEAD(&rxnet->idle_client_conns); - INIT_WORK(&rxnet->client_conn_reaper, - rxrpc_discard_expired_client_conns); - timer_setup(&rxnet->client_conn_reap_timer, - rxrpc_client_conn_reap_timeout, 0); INIT_HLIST_HEAD(&rxnet->local_endpoints); mutex_init(&rxnet->local_mutex); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 3d8c9f830ee0..a9746be29634 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -261,7 +261,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) rxrpc_tx_point_call_ack); rxrpc_tx_backoff(call, ret); - if (call->state < RXRPC_CALL_COMPLETE) { + if (!__rxrpc_call_is_complete(call)) { if (ret < 0) rxrpc_cancel_rtt_probe(call, serial, rtt_slot); rxrpc_set_keepalive(call); @@ -545,6 +545,62 @@ send_fragmentable: } /* + * Transmit a connection-level abort. + */ +void rxrpc_send_conn_abort(struct rxrpc_connection *conn) +{ + struct rxrpc_wire_header whdr; + struct msghdr msg; + struct kvec iov[2]; + __be32 word; + size_t len; + u32 serial; + int ret; + + msg.msg_name = &conn->peer->srx.transport; + msg.msg_namelen = conn->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + whdr.epoch = htonl(conn->proto.epoch); + whdr.cid = htonl(conn->proto.cid); + whdr.callNumber = 0; + whdr.seq = 0; + whdr.type = RXRPC_PACKET_TYPE_ABORT; + whdr.flags = conn->out_clientflag; + whdr.userStatus = 0; + whdr.securityIndex = conn->security_ix; + whdr._rsvd = 0; + whdr.serviceId = htons(conn->service_id); + + word = htonl(conn->abort_code); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = &word; + iov[1].iov_len = sizeof(word); + + len = iov[0].iov_len + iov[1].iov_len; + + serial = atomic_inc_return(&conn->serial); + whdr.serial = htonl(serial); + + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len); + ret = do_udp_sendmsg(conn->local->socket, &msg, len); + if (ret < 0) { + trace_rxrpc_tx_fail(conn->debug_id, serial, ret, + rxrpc_tx_point_conn_abort); + _debug("sendmsg failed: %d", ret); + return; + } + + trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort); + + conn->peer->last_tx_at = ktime_get_seconds(); +} + +/* * Reject a packet through the local endpoint. */ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) @@ -667,7 +723,7 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer) static inline void rxrpc_instant_resend(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { - if (call->state < RXRPC_CALL_COMPLETE) + if (!__rxrpc_call_is_complete(call)) kdebug("resend"); } diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 4eecea2be307..8d7a715a0bb1 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -147,10 +147,10 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, * assess the MTU size for the network interface through which this peer is * reached */ -static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx, +static void rxrpc_assess_MTU_size(struct rxrpc_local *local, struct rxrpc_peer *peer) { - struct net *net = sock_net(&rx->sk); + struct net *net = local->net; struct dst_entry *dst; struct rtable *rt; struct flowi fl; @@ -236,11 +236,11 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp, /* * Initialise peer record. */ -static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer, +static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer, unsigned long hash_key) { peer->hash_key = hash_key; - rxrpc_assess_MTU_size(rx, peer); + rxrpc_assess_MTU_size(local, peer); peer->mtu = peer->if_mtu; peer->rtt_last_req = ktime_get_real(); @@ -272,8 +272,7 @@ static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer, /* * Set up a new peer. */ -static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, - struct rxrpc_local *local, +static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, unsigned long hash_key, gfp_t gfp) @@ -285,7 +284,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, peer = rxrpc_alloc_peer(local, gfp, rxrpc_peer_new_client); if (peer) { memcpy(&peer->srx, srx, sizeof(*srx)); - rxrpc_init_peer(rx, peer, hash_key); + rxrpc_init_peer(local, peer, hash_key); } _leave(" = %p", peer); @@ -304,14 +303,13 @@ static void rxrpc_free_peer(struct rxrpc_peer *peer) * since we've already done a search in the list from the non-reentrant context * (the data_ready handler) that is the only place we can add new peers. */ -void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local, - struct rxrpc_peer *peer) +void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) { struct rxrpc_net *rxnet = local->rxnet; unsigned long hash_key; hash_key = rxrpc_peer_hash_key(local, &peer->srx); - rxrpc_init_peer(rx, peer, hash_key); + rxrpc_init_peer(local, peer, hash_key); spin_lock(&rxnet->peer_hash_lock); hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); @@ -322,8 +320,7 @@ void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local, /* * obtain a remote transport endpoint for the specified address */ -struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, - struct rxrpc_local *local, +struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_peer *peer, *candidate; @@ -343,7 +340,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, /* The peer is not yet present in hash - create a candidate * for a new record and then redo the search. */ - candidate = rxrpc_create_peer(rx, local, srx, hash_key, gfp); + candidate = rxrpc_create_peer(local, srx, hash_key, gfp); if (!candidate) { _leave(" = NULL [nomem]"); return NULL; diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 3a59591ec061..750158a085cd 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -12,13 +12,13 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { [RXRPC_CONN_UNUSED] = "Unused ", + [RXRPC_CONN_CLIENT_UNSECURED] = "ClUnsec ", [RXRPC_CONN_CLIENT] = "Client ", [RXRPC_CONN_SERVICE_PREALLOC] = "SvPrealc", [RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ", [RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ", [RXRPC_CONN_SERVICE] = "SvSecure", - [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort", - [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort", + [RXRPC_CONN_ABORTED] = "Aborted ", }; /* @@ -51,6 +51,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) struct rxrpc_local *local; struct rxrpc_call *call; struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + enum rxrpc_call_state state; unsigned long timeout = 0; rxrpc_seq_t acks_hard_ack; char lbuff[50], rbuff[50]; @@ -75,7 +76,8 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) sprintf(rbuff, "%pISpc", &call->dest_srx.transport); - if (call->state != RXRPC_CALL_SERVER_PREALLOC) { + state = rxrpc_call_state(call); + if (state != RXRPC_CALL_SERVER_PREALLOC) { timeout = READ_ONCE(call->expect_rx_by); timeout -= jiffies; } @@ -92,7 +94,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call->call_id, rxrpc_is_service_call(call) ? "Svc" : "Clt", refcount_read(&call->ref), - rxrpc_call_states[call->state], + rxrpc_call_states[state], call->abort_code, call->debug_id, acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack, @@ -143,6 +145,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) { struct rxrpc_connection *conn; struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + const char *state; char lbuff[50], rbuff[50]; if (v == &rxnet->conn_proc_list) { @@ -163,9 +166,11 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) } sprintf(lbuff, "%pISpc", &conn->local->srx.transport); - sprintf(rbuff, "%pISpc", &conn->peer->srx.transport); print: + state = rxrpc_is_conn_aborted(conn) ? + rxrpc_call_completions[conn->completion] : + rxrpc_conn_states[conn->state]; seq_printf(seq, "UDP %-47.47s %-47.47s %4x %08x %s %3u %3d" " %s %08x %08x %08x %08x %08x %08x %08x\n", @@ -176,7 +181,7 @@ print: rxrpc_conn_is_service(conn) ? "Svc" : "Clt", refcount_read(&conn->ref), atomic_read(&conn->active), - rxrpc_conn_states[conn->state], + state, key_serial(conn->key), atomic_read(&conn->serial), conn->hi_serial, diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 6ebd6440a2b7..dd54ceee7bcc 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -59,85 +59,6 @@ void rxrpc_notify_socket(struct rxrpc_call *call) } /* - * Transition a call to the complete state. - */ -bool __rxrpc_set_call_completion(struct rxrpc_call *call, - enum rxrpc_call_completion compl, - u32 abort_code, - int error) -{ - if (call->state < RXRPC_CALL_COMPLETE) { - call->abort_code = abort_code; - call->error = error; - call->completion = compl; - call->state = RXRPC_CALL_COMPLETE; - trace_rxrpc_call_complete(call); - wake_up(&call->waitq); - rxrpc_notify_socket(call); - return true; - } - return false; -} - -bool rxrpc_set_call_completion(struct rxrpc_call *call, - enum rxrpc_call_completion compl, - u32 abort_code, - int error) -{ - bool ret = false; - - if (call->state < RXRPC_CALL_COMPLETE) { - write_lock(&call->state_lock); - ret = __rxrpc_set_call_completion(call, compl, abort_code, error); - write_unlock(&call->state_lock); - } - return ret; -} - -/* - * Record that a call successfully completed. - */ -bool __rxrpc_call_completed(struct rxrpc_call *call) -{ - return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); -} - -bool rxrpc_call_completed(struct rxrpc_call *call) -{ - bool ret = false; - - if (call->state < RXRPC_CALL_COMPLETE) { - write_lock(&call->state_lock); - ret = __rxrpc_call_completed(call); - write_unlock(&call->state_lock); - } - return ret; -} - -/* - * Record that a call is locally aborted. - */ -bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, - rxrpc_seq_t seq, u32 abort_code, int error) -{ - trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, - abort_code, error); - return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, - abort_code, error); -} - -bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, - rxrpc_seq_t seq, u32 abort_code, int error) -{ - bool ret; - - write_lock(&call->state_lock); - ret = __rxrpc_abort_call(why, call, seq, abort_code, error); - write_unlock(&call->state_lock); - return ret; -} - -/* * Pass a call terminating message to userspace. */ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) @@ -168,7 +89,7 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp); break; default: - pr_err("Invalid terminal call state %u\n", call->state); + pr_err("Invalid terminal call state %u\n", call->completion); BUG(); break; } @@ -180,41 +101,6 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) } /* - * End the packet reception phase. - */ -static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) -{ - rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq); - - _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); - - trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh); - - if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) - rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack); - - write_lock(&call->state_lock); - - switch (call->state) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - __rxrpc_call_completed(call); - write_unlock(&call->state_lock); - break; - - case RXRPC_CALL_SERVER_RECV_REQUEST: - call->state = RXRPC_CALL_SERVER_ACK_REQUEST; - call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; - write_unlock(&call->state_lock); - rxrpc_propose_delay_ACK(call, serial, - rxrpc_propose_ack_processing_op); - break; - default: - write_unlock(&call->state_lock); - break; - } -} - -/* * Discard a packet we've used up and advance the Rx window by one. */ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) @@ -244,10 +130,9 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) trace_rxrpc_receive(call, last ? rxrpc_receive_rotate_last : rxrpc_receive_rotate, serial, call->rx_consumed); - if (last) { - rxrpc_end_rx_phase(call, serial); - return; - } + + if (last) + set_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags); /* Check to see if there's an ACK that needs sending. */ acked = atomic_add_return(call->rx_consumed - old_consumed, @@ -272,7 +157,8 @@ static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb) /* * Deliver messages to a call. This keeps processing packets until the buffer * is filled and we find either more DATA (returns 0) or the end of the DATA - * (returns 1). If more packets are required, it returns -EAGAIN. + * (returns 1). If more packets are required, it returns -EAGAIN and if the + * call has failed it returns -EIO. */ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, struct msghdr *msg, struct iov_iter *iter, @@ -288,7 +174,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, rx_pkt_offset = call->rx_pkt_offset; rx_pkt_len = call->rx_pkt_len; - if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) { + if (rxrpc_call_has_failed(call)) { + seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1; + ret = -EIO; + goto done; + } + + if (test_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags)) { seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1; ret = 1; goto done; @@ -312,14 +204,15 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (rx_pkt_offset == 0) { ret2 = rxrpc_verify_data(call, skb); - rx_pkt_offset = sp->offset; - rx_pkt_len = sp->len; trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq, - rx_pkt_offset, rx_pkt_len, ret2); + sp->offset, sp->len, ret2); if (ret2 < 0) { + kdebug("verify = %d", ret2); ret = ret2; goto out; } + rx_pkt_offset = sp->offset; + rx_pkt_len = sp->len; } else { trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq, rx_pkt_offset, rx_pkt_len, 0); @@ -494,36 +387,36 @@ try_again: msg->msg_namelen = len; } - switch (READ_ONCE(call->state)) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - case RXRPC_CALL_SERVER_RECV_REQUEST: - case RXRPC_CALL_SERVER_ACK_REQUEST: - ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len, - flags, &copied); - if (ret == -EAGAIN) - ret = 0; - - if (!skb_queue_empty(&call->recvmsg_queue)) - rxrpc_notify_socket(call); - break; - default: + ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len, + flags, &copied); + if (ret == -EAGAIN) ret = 0; - break; - } - + if (ret == -EIO) + goto call_failed; if (ret < 0) goto error_unlock_call; - if (call->state == RXRPC_CALL_COMPLETE) { - ret = rxrpc_recvmsg_term(call, msg); - if (ret < 0) - goto error_unlock_call; - if (!(flags & MSG_PEEK)) - rxrpc_release_call(rx, call); - msg->msg_flags |= MSG_EOR; - ret = 1; - } + if (rxrpc_call_is_complete(call) && + skb_queue_empty(&call->recvmsg_queue)) + goto call_complete; + if (rxrpc_call_has_failed(call)) + goto call_failed; + rxrpc_notify_socket(call); + goto not_yet_complete; + +call_failed: + rxrpc_purge_queue(&call->recvmsg_queue); +call_complete: + ret = rxrpc_recvmsg_term(call, msg); + if (ret < 0) + goto error_unlock_call; + if (!(flags & MSG_PEEK)) + rxrpc_release_call(rx, call); + msg->msg_flags |= MSG_EOR; + ret = 1; + +not_yet_complete: if (ret == 0) msg->msg_flags |= MSG_MORE; else @@ -586,49 +479,34 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, size_t offset = 0; int ret; - _enter("{%d,%s},%zu,%d", - call->debug_id, rxrpc_call_states[call->state], - *_len, want_more); - - ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_SECURING); + _enter("{%d},%zu,%d", call->debug_id, *_len, want_more); mutex_lock(&call->user_mutex); - switch (READ_ONCE(call->state)) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - case RXRPC_CALL_SERVER_RECV_REQUEST: - case RXRPC_CALL_SERVER_ACK_REQUEST: - ret = rxrpc_recvmsg_data(sock, call, NULL, iter, - *_len, 0, &offset); - *_len -= offset; - if (ret < 0) - goto out; - - /* We can only reach here with a partially full buffer if we - * have reached the end of the data. We must otherwise have a - * full buffer or have been given -EAGAIN. - */ - if (ret == 1) { - if (iov_iter_count(iter) > 0) - goto short_data; - if (!want_more) - goto read_phase_complete; - ret = 0; - goto out; - } - - if (!want_more) - goto excess_data; + ret = rxrpc_recvmsg_data(sock, call, NULL, iter, *_len, 0, &offset); + *_len -= offset; + if (ret == -EIO) + goto call_failed; + if (ret < 0) goto out; - case RXRPC_CALL_COMPLETE: - goto call_complete; - - default: - ret = -EINPROGRESS; + /* We can only reach here with a partially full buffer if we have + * reached the end of the data. We must otherwise have a full buffer + * or have been given -EAGAIN. + */ + if (ret == 1) { + if (iov_iter_count(iter) > 0) + goto short_data; + if (!want_more) + goto read_phase_complete; + ret = 0; goto out; } + if (!want_more) + goto excess_data; + goto out; + read_phase_complete: ret = 1; out: @@ -639,14 +517,18 @@ out: return ret; short_data: - trace_rxrpc_rx_eproto(call, 0, tracepoint_string("short_data")); + trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_short_data, + call->cid, call->call_id, call->rx_consumed, + 0, -EBADMSG); ret = -EBADMSG; goto out; excess_data: - trace_rxrpc_rx_eproto(call, 0, tracepoint_string("excess_data")); + trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_excess_data, + call->cid, call->call_id, call->rx_consumed, + 0, -EMSGSIZE); ret = -EMSGSIZE; goto out; -call_complete: +call_failed: *_abort = call->abort_code; ret = call->error; if (call->completion == RXRPC_CALL_SUCCEEDED) { diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index d1233720e05f..1bf571a66e02 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -411,18 +411,15 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv; struct scatterlist sg[16]; - bool aborted; u32 data_size, buf; u16 check; int ret; _enter(""); - if (sp->len < 8) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H", - RXKADSEALEDINCON); - goto protocol_error; - } + if (sp->len < 8) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_1_short_header); /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. @@ -442,11 +439,9 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, skcipher_request_zero(req); /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1", - RXKADDATALEN); - goto protocol_error; - } + if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_1_short_encdata); sp->offset += sizeof(sechdr); sp->len -= sizeof(sechdr); @@ -456,26 +451,16 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, check = buf >> 16; check ^= seq ^ call->call_id; check &= 0xffff; - if (check != 0) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_check", "V1C", - RXKADSEALEDINCON); - goto protocol_error; - } - - if (data_size > sp->len) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L", - RXKADDATALEN); - goto protocol_error; - } + if (check != 0) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_1_short_check); + if (data_size > sp->len) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_1_short_data); sp->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; - -protocol_error: - if (aborted) - rxrpc_send_abort_packet(call); - return -EPROTO; } /* @@ -490,18 +475,15 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; - bool aborted; u32 data_size, buf; u16 check; int nsg, ret; _enter(",{%d}", sp->len); - if (sp->len < 8) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H", - RXKADSEALEDINCON); - goto protocol_error; - } + if (sp->len < 8) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_2_short_header); /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. @@ -513,7 +495,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, } else { sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO); if (!sg) - goto nomem; + return -ENOMEM; } sg_init_table(sg, nsg); @@ -537,11 +519,9 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, kfree(sg); /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2", - RXKADDATALEN); - goto protocol_error; - } + if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_2_short_len); sp->offset += sizeof(sechdr); sp->len -= sizeof(sechdr); @@ -551,30 +531,17 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, check = buf >> 16; check ^= seq ^ call->call_id; check &= 0xffff; - if (check != 0) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_check", "V2C", - RXKADSEALEDINCON); - goto protocol_error; - } + if (check != 0) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_2_short_check); - if (data_size > sp->len) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L", - RXKADDATALEN); - goto protocol_error; - } + if (data_size > sp->len) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_2_short_data); sp->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; - -protocol_error: - if (aborted) - rxrpc_send_abort_packet(call); - return -EPROTO; - -nomem: - _leave(" = -ENOMEM"); - return -ENOMEM; } /* @@ -590,7 +557,6 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) __be32 buf[2]; } crypto __aligned(8); rxrpc_seq_t seq = sp->hdr.seq; - bool aborted; int ret; u16 cksum; u32 x, y; @@ -627,9 +593,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) cksum = 1; /* zero checksums are not permitted */ if (cksum != sp->hdr.cksum) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK", - RXKADSEALEDINCON); - goto protocol_error; + ret = rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_bad_checksum); + goto out; } switch (call->conn->security_level) { @@ -647,13 +613,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) break; } +out: skcipher_request_free(req); return ret; - -protocol_error: - if (aborted) - rxrpc_send_abort_packet(call); - return -EPROTO; } /* @@ -821,34 +783,30 @@ static int rxkad_encrypt_response(struct rxrpc_connection *conn, * respond to a challenge packet */ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb) { const struct rxrpc_key_token *token; struct rxkad_challenge challenge; struct rxkad_response *resp; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - const char *eproto; - u32 version, nonce, min_level, abort_code; - int ret; + u32 version, nonce, min_level; + int ret = -EPROTO; _enter("{%d,%x}", conn->debug_id, key_serial(conn->key)); - eproto = tracepoint_string("chall_no_key"); - abort_code = RX_PROTOCOL_ERROR; if (!conn->key) - goto protocol_error; + return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, + rxkad_abort_chall_no_key); - abort_code = RXKADEXPIRED; ret = key_validate(conn->key); if (ret < 0) - goto other_error; + return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret, + rxkad_abort_chall_key_expired); - eproto = tracepoint_string("chall_short"); - abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &challenge, sizeof(challenge)) < 0) - goto protocol_error; + return rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_chall_short); version = ntohl(challenge.version); nonce = ntohl(challenge.nonce); @@ -856,15 +814,13 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version, nonce, min_level); - eproto = tracepoint_string("chall_ver"); - abort_code = RXKADINCONSISTENCY; if (version != RXKAD_VERSION) - goto protocol_error; + return rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, + rxkad_abort_chall_version); - abort_code = RXKADLEVELFAIL; - ret = -EACCES; if (conn->security_level < min_level) - goto other_error; + return rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EACCES, + rxkad_abort_chall_level); token = conn->key->payload.data[0]; @@ -893,13 +849,6 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad); kfree(resp); return ret; - -protocol_error: - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); - ret = -EPROTO; -other_error: - *_abort_code = abort_code; - return ret; } /* @@ -910,20 +859,15 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, struct sk_buff *skb, void *ticket, size_t ticket_len, struct rxrpc_crypt *_session_key, - time64_t *_expiry, - u32 *_abort_code) + time64_t *_expiry) { struct skcipher_request *req; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv, key; struct scatterlist sg[1]; struct in_addr addr; unsigned int life; - const char *eproto; time64_t issue, now; bool little_endian; - int ret; - u32 abort_code; u8 *p, *q, *name, *end; _enter("{%d},{%x}", conn->debug_id, key_serial(server_key)); @@ -935,10 +879,9 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, memcpy(&iv, &server_key->payload.data[2], sizeof(iv)); - ret = -ENOMEM; req = skcipher_request_alloc(server_key->payload.data[0], GFP_NOFS); if (!req) - goto temporary_error; + return -ENOMEM; sg_init_one(&sg[0], ticket, ticket_len); skcipher_request_set_callback(req, 0, NULL, NULL); @@ -949,18 +892,21 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, p = ticket; end = p + ticket_len; -#define Z(field) \ - ({ \ - u8 *__str = p; \ - eproto = tracepoint_string("rxkad_bad_"#field); \ - q = memchr(p, 0, end - p); \ - if (!q || q - p > (field##_SZ)) \ - goto bad_ticket; \ - for (; p < q; p++) \ - if (!isprint(*p)) \ - goto bad_ticket; \ - p++; \ - __str; \ +#define Z(field, fieldl) \ + ({ \ + u8 *__str = p; \ + q = memchr(p, 0, end - p); \ + if (!q || q - p > field##_SZ) \ + return rxrpc_abort_conn( \ + conn, skb, RXKADBADTICKET, -EPROTO, \ + rxkad_abort_resp_tkt_##fieldl); \ + for (; p < q; p++) \ + if (!isprint(*p)) \ + return rxrpc_abort_conn( \ + conn, skb, RXKADBADTICKET, -EPROTO, \ + rxkad_abort_resp_tkt_##fieldl); \ + p++; \ + __str; \ }) /* extract the ticket flags */ @@ -969,20 +915,20 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, p++; /* extract the authentication name */ - name = Z(ANAME); + name = Z(ANAME, aname); _debug("KIV ANAME: %s", name); /* extract the principal's instance */ - name = Z(INST); + name = Z(INST, inst); _debug("KIV INST : %s", name); /* extract the principal's authentication domain */ - name = Z(REALM); + name = Z(REALM, realm); _debug("KIV REALM: %s", name); - eproto = tracepoint_string("rxkad_bad_len"); if (end - p < 4 + 8 + 4 + 2) - goto bad_ticket; + return rxrpc_abort_conn(conn, skb, RXKADBADTICKET, -EPROTO, + rxkad_abort_resp_tkt_short); /* get the IPv4 address of the entity that requested the ticket */ memcpy(&addr, p, sizeof(addr)); @@ -1014,38 +960,23 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, _debug("KIV ISSUE: %llx [%llx]", issue, now); /* check the ticket is in date */ - if (issue > now) { - abort_code = RXKADNOAUTH; - ret = -EKEYREJECTED; - goto other_error; - } - - if (issue < now - life) { - abort_code = RXKADEXPIRED; - ret = -EKEYEXPIRED; - goto other_error; - } + if (issue > now) + return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, -EKEYREJECTED, + rxkad_abort_resp_tkt_future); + if (issue < now - life) + return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, -EKEYEXPIRED, + rxkad_abort_resp_tkt_expired); *_expiry = issue + life; /* get the service name */ - name = Z(SNAME); + name = Z(SNAME, sname); _debug("KIV SNAME: %s", name); /* get the service instance name */ - name = Z(INST); + name = Z(INST, sinst); _debug("KIV SINST: %s", name); return 0; - -bad_ticket: - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); - abort_code = RXKADBADTICKET; - ret = -EPROTO; -other_error: - *_abort_code = abort_code; - return ret; -temporary_error: - return ret; } /* @@ -1086,17 +1017,15 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn, * verify a response */ static int rxkad_verify_response(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb) { struct rxkad_response *response; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; struct key *server_key; - const char *eproto; time64_t expiry; void *ticket; - u32 abort_code, version, kvno, ticket_len, level; + u32 version, kvno, ticket_len, level; __be32 csum; int ret, i; @@ -1104,22 +1033,18 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, server_key = rxrpc_look_up_server_security(conn, skb, 0, 0); if (IS_ERR(server_key)) { - switch (PTR_ERR(server_key)) { + ret = PTR_ERR(server_key); + switch (ret) { case -ENOKEY: - abort_code = RXKADUNKNOWNKEY; - break; + return rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, ret, + rxkad_abort_resp_nokey); case -EKEYEXPIRED: - abort_code = RXKADEXPIRED; - break; + return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret, + rxkad_abort_resp_key_expired); default: - abort_code = RXKADNOAUTH; - break; + return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, ret, + rxkad_abort_resp_key_rejected); } - trace_rxrpc_abort(0, "SVK", - sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - abort_code, PTR_ERR(server_key)); - *_abort_code = abort_code; - return -EPROTO; } ret = -ENOMEM; @@ -1127,11 +1052,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, if (!response) goto temporary_error; - eproto = tracepoint_string("rxkad_rsp_short"); - abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), - response, sizeof(*response)) < 0) + response, sizeof(*response)) < 0) { + rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_resp_short); goto protocol_error; + } version = ntohl(response->version); ticket_len = ntohl(response->ticket_len); @@ -1139,20 +1065,23 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len); - eproto = tracepoint_string("rxkad_rsp_ver"); - abort_code = RXKADINCONSISTENCY; - if (version != RXKAD_VERSION) + if (version != RXKAD_VERSION) { + rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, + rxkad_abort_resp_version); goto protocol_error; + } - eproto = tracepoint_string("rxkad_rsp_tktlen"); - abort_code = RXKADTICKETLEN; - if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) + if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) { + rxrpc_abort_conn(conn, skb, RXKADTICKETLEN, -EPROTO, + rxkad_abort_resp_tkt_len); goto protocol_error; + } - eproto = tracepoint_string("rxkad_rsp_unkkey"); - abort_code = RXKADUNKNOWNKEY; - if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) + if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) { + rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, -EPROTO, + rxkad_abort_resp_unknown_tkt); goto protocol_error; + } /* extract the kerberos ticket and decrypt and decode it */ ret = -ENOMEM; @@ -1160,15 +1089,15 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, if (!ticket) goto temporary_error_free_resp; - eproto = tracepoint_string("rxkad_tkt_short"); - abort_code = RXKADPACKETSHORT; - ret = skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), - ticket, ticket_len); - if (ret < 0) - goto temporary_error_free_ticket; + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), + ticket, ticket_len) < 0) { + rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_resp_short_tkt); + goto protocol_error; + } ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len, - &session_key, &expiry, _abort_code); + &session_key, &expiry); if (ret < 0) goto temporary_error_free_ticket; @@ -1176,56 +1105,61 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, * response */ rxkad_decrypt_response(conn, response, &session_key); - eproto = tracepoint_string("rxkad_rsp_param"); - abort_code = RXKADSEALEDINCON; - if (ntohl(response->encrypted.epoch) != conn->proto.epoch) - goto protocol_error_free; - if (ntohl(response->encrypted.cid) != conn->proto.cid) - goto protocol_error_free; - if (ntohl(response->encrypted.securityIndex) != conn->security_ix) + if (ntohl(response->encrypted.epoch) != conn->proto.epoch || + ntohl(response->encrypted.cid) != conn->proto.cid || + ntohl(response->encrypted.securityIndex) != conn->security_ix) { + rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_bad_param); goto protocol_error_free; + } + csum = response->encrypted.checksum; response->encrypted.checksum = 0; rxkad_calc_response_checksum(response); - eproto = tracepoint_string("rxkad_rsp_csum"); - if (response->encrypted.checksum != csum) + if (response->encrypted.checksum != csum) { + rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_bad_checksum); goto protocol_error_free; + } - spin_lock(&conn->bundle->channel_lock); for (i = 0; i < RXRPC_MAXCALLS; i++) { - struct rxrpc_call *call; u32 call_id = ntohl(response->encrypted.call_id[i]); + u32 counter = READ_ONCE(conn->channels[i].call_counter); + + if (call_id > INT_MAX) { + rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_bad_callid); + goto protocol_error_free; + } - eproto = tracepoint_string("rxkad_rsp_callid"); - if (call_id > INT_MAX) - goto protocol_error_unlock; - - eproto = tracepoint_string("rxkad_rsp_callctr"); - if (call_id < conn->channels[i].call_counter) - goto protocol_error_unlock; - - eproto = tracepoint_string("rxkad_rsp_callst"); - if (call_id > conn->channels[i].call_counter) { - call = rcu_dereference_protected( - conn->channels[i].call, - lockdep_is_held(&conn->bundle->channel_lock)); - if (call && call->state < RXRPC_CALL_COMPLETE) - goto protocol_error_unlock; + if (call_id < counter) { + rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_call_ctr); + goto protocol_error_free; + } + + if (call_id > counter) { + if (conn->channels[i].call) { + rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_call_state); + goto protocol_error_free; + } conn->channels[i].call_counter = call_id; } } - spin_unlock(&conn->bundle->channel_lock); - eproto = tracepoint_string("rxkad_rsp_seq"); - abort_code = RXKADOUTOFSEQUENCE; - if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) + if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) { + rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO, + rxkad_abort_resp_ooseq); goto protocol_error_free; + } - eproto = tracepoint_string("rxkad_rsp_level"); - abort_code = RXKADLEVELFAIL; level = ntohl(response->encrypted.level); - if (level > RXRPC_SECURITY_ENCRYPT) + if (level > RXRPC_SECURITY_ENCRYPT) { + rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EPROTO, + rxkad_abort_resp_level); goto protocol_error_free; + } conn->security_level = level; /* create a key to hold the security data and expiration time - after @@ -1240,15 +1174,11 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _leave(" = 0"); return 0; -protocol_error_unlock: - spin_unlock(&conn->bundle->channel_lock); protocol_error_free: kfree(ticket); protocol_error: kfree(response); - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); key_put(server_key); - *_abort_code = abort_code; return -EPROTO; temporary_error_free_ticket: diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c index d33a109e846c..16dcabb71ebe 100644 --- a/net/rxrpc/rxperf.c +++ b/net/rxrpc/rxperf.c @@ -10,6 +10,8 @@ #include <linux/slab.h> #include <net/sock.h> #include <net/af_rxrpc.h> +#define RXRPC_TRACE_ONLY_DEFINE_ENUMS +#include <trace/events/rxrpc.h> MODULE_DESCRIPTION("rxperf test server (afs)"); MODULE_AUTHOR("Red Hat, Inc."); @@ -307,12 +309,14 @@ static void rxperf_deliver_to_call(struct work_struct *work) case -EOPNOTSUPP: abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, - abort_code, ret, "GOP"); + abort_code, ret, + rxperf_abort_op_not_supported); goto call_complete; case -ENOTSUPP: abort_code = RX_USER_ABORT; rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, - abort_code, ret, "GUA"); + abort_code, ret, + rxperf_abort_op_not_supported); goto call_complete; case -EIO: pr_err("Call %u in bad state %u\n", @@ -324,11 +328,13 @@ static void rxperf_deliver_to_call(struct work_struct *work) case -ENOMEM: case -EFAULT: rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, - RXGEN_SS_UNMARSHAL, ret, "GUM"); + RXGEN_SS_UNMARSHAL, ret, + rxperf_abort_unmarshal_error); goto call_complete; default: rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, - RX_CALL_DEAD, ret, "GER"); + RX_CALL_DEAD, ret, + rxperf_abort_general_error); goto call_complete; } } @@ -523,7 +529,8 @@ static int rxperf_process_call(struct rxperf_call *call) if (n == -ENOMEM) rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, - RXGEN_SS_MARSHAL, -ENOMEM, "GOM"); + RXGEN_SS_MARSHAL, -ENOMEM, + rxperf_abort_oom); return n; } diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index ab968f65a490..cb8dd1d3b1d4 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -97,38 +97,31 @@ found: */ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) { - const struct rxrpc_security *sec; struct rxrpc_key_token *token; struct key *key = conn->key; - int ret; + int ret = 0; _enter("{%d},{%x}", conn->debug_id, key_serial(key)); - if (!key) - return 0; - - ret = key_validate(key); - if (ret < 0) - return ret; - for (token = key->payload.data[0]; token; token = token->next) { - sec = rxrpc_security_lookup(token->security_index); - if (sec) + if (token->security_index == conn->security->security_index) goto found; } return -EKEYREJECTED; found: - conn->security = sec; - - ret = conn->security->init_connection_security(conn, token); - if (ret < 0) { - conn->security = &rxrpc_no_security; - return ret; + mutex_lock(&conn->security_lock); + if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) { + ret = conn->security->init_connection_security(conn, token); + if (ret == 0) { + spin_lock(&conn->state_lock); + if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) + conn->state = RXRPC_CONN_CLIENT; + spin_unlock(&conn->state_lock); + } } - - _leave(" = 0"); - return 0; + mutex_unlock(&conn->security_lock); + return ret; } /* @@ -144,21 +137,15 @@ const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *rx, sec = rxrpc_security_lookup(sp->hdr.securityIndex); if (!sec) { - trace_rxrpc_abort(0, "SVS", - sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_INVALID_OPERATION, EKEYREJECTED); - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; - skb->priority = RX_INVALID_OPERATION; + rxrpc_direct_abort(skb, rxrpc_abort_unsupported_security, + RX_INVALID_OPERATION, -EKEYREJECTED); return NULL; } if (sp->hdr.securityIndex != RXRPC_SECURITY_NONE && !rx->securities) { - trace_rxrpc_abort(0, "SVR", - sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_INVALID_OPERATION, EKEYREJECTED); - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; - skb->priority = sec->no_key_abort; + rxrpc_direct_abort(skb, rxrpc_abort_no_service_key, + sec->no_key_abort, -EKEYREJECTED); return NULL; } @@ -191,9 +178,9 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn, sprintf(kdesc, "%u:%u", sp->hdr.serviceId, sp->hdr.securityIndex); - rcu_read_lock(); + read_lock(&conn->local->services_lock); - rx = rcu_dereference(conn->local->service); + rx = conn->local->service; if (!rx) goto out; @@ -215,6 +202,6 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn, } out: - rcu_read_unlock(); + read_unlock(&conn->local->services_lock); return key; } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index cde1e65f16b4..da49fcf1c456 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -18,6 +18,81 @@ #include "ar-internal.h" /* + * Propose an abort to be made in the I/O thread. + */ +bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error, + enum rxrpc_abort_reason why) +{ + _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why); + + if (!call->send_abort && !rxrpc_call_is_complete(call)) { + call->send_abort_why = why; + call->send_abort_err = error; + call->send_abort_seq = 0; + /* Request abort locklessly vs rxrpc_input_call_event(). */ + smp_store_release(&call->send_abort, abort_code); + rxrpc_poke_call(call, rxrpc_call_poke_abort); + return true; + } + + return false; +} + +/* + * Wait for a call to become connected. Interruption here doesn't cause the + * call to be aborted. + */ +static int rxrpc_wait_to_be_connected(struct rxrpc_call *call, long *timeo) +{ + DECLARE_WAITQUEUE(myself, current); + int ret = 0; + + _enter("%d", call->debug_id); + + if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) + return call->error; + + add_wait_queue_exclusive(&call->waitq, &myself); + + for (;;) { + ret = call->error; + if (ret < 0) + break; + + switch (call->interruptibility) { + case RXRPC_INTERRUPTIBLE: + case RXRPC_PREINTERRUPTIBLE: + set_current_state(TASK_INTERRUPTIBLE); + break; + case RXRPC_UNINTERRUPTIBLE: + default: + set_current_state(TASK_UNINTERRUPTIBLE); + break; + } + if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) { + ret = call->error; + break; + } + if ((call->interruptibility == RXRPC_INTERRUPTIBLE || + call->interruptibility == RXRPC_PREINTERRUPTIBLE) && + signal_pending(current)) { + ret = sock_intr_errno(*timeo); + break; + } + *timeo = schedule_timeout(*timeo); + } + + remove_wait_queue(&call->waitq, &myself); + __set_current_state(TASK_RUNNING); + + if (ret == 0 && rxrpc_call_is_complete(call)) + ret = call->error; + + _leave(" = %d", ret); + return ret; +} + +/* * Return true if there's sufficient Tx queue space. */ static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win) @@ -39,7 +114,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, if (rxrpc_check_tx_space(call, NULL)) return 0; - if (call->state >= RXRPC_CALL_COMPLETE) + if (rxrpc_call_is_complete(call)) return call->error; if (signal_pending(current)) @@ -74,7 +149,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, if (rxrpc_check_tx_space(call, &tx_win)) return 0; - if (call->state >= RXRPC_CALL_COMPLETE) + if (rxrpc_call_is_complete(call)) return call->error; if (timeout == 0 && @@ -103,7 +178,7 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, if (rxrpc_check_tx_space(call, NULL)) return 0; - if (call->state >= RXRPC_CALL_COMPLETE) + if (rxrpc_call_is_complete(call)) return call->error; trace_rxrpc_txqueue(call, rxrpc_txqueue_wait); @@ -168,7 +243,6 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, struct rxrpc_txbuf *txb, rxrpc_notify_end_tx_t notify_end_tx) { - unsigned long now; rxrpc_seq_t seq = txb->seq; bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags), poke; @@ -191,36 +265,10 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, poke = list_empty(&call->tx_sendmsg); list_add_tail(&txb->call_link, &call->tx_sendmsg); call->tx_prepared = seq; + if (last) + rxrpc_notify_end_tx(rx, call, notify_end_tx); spin_unlock(&call->tx_lock); - if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { - _debug("________awaiting reply/ACK__________"); - write_lock(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_CLIENT_SEND_REQUEST: - call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; - rxrpc_notify_end_tx(rx, call, notify_end_tx); - break; - case RXRPC_CALL_SERVER_ACK_REQUEST: - call->state = RXRPC_CALL_SERVER_SEND_REPLY; - now = jiffies; - WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET); - if (call->ackr_reason == RXRPC_ACK_DELAY) - call->ackr_reason = 0; - trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); - if (!last) - break; - fallthrough; - case RXRPC_CALL_SERVER_SEND_REPLY: - call->state = RXRPC_CALL_SERVER_AWAIT_ACK; - rxrpc_notify_end_tx(rx, call, notify_end_tx); - break; - default: - break; - } - write_unlock(&call->state_lock); - } - if (poke) rxrpc_poke_call(call, rxrpc_call_poke_start); } @@ -245,6 +293,16 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + ret = rxrpc_wait_to_be_connected(call, &timeo); + if (ret < 0) + return ret; + + if (call->conn->state == RXRPC_CONN_CLIENT_UNSECURED) { + ret = rxrpc_init_client_conn_security(call->conn); + if (ret < 0) + return ret; + } + /* this should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -252,15 +310,20 @@ reload: ret = -EPIPE; if (sk->sk_shutdown & SEND_SHUTDOWN) goto maybe_error; - state = READ_ONCE(call->state); + state = rxrpc_call_state(call); ret = -ESHUTDOWN; if (state >= RXRPC_CALL_COMPLETE) goto maybe_error; ret = -EPROTO; if (state != RXRPC_CALL_CLIENT_SEND_REQUEST && state != RXRPC_CALL_SERVER_ACK_REQUEST && - state != RXRPC_CALL_SERVER_SEND_REPLY) + state != RXRPC_CALL_SERVER_SEND_REPLY) { + /* Request phase complete for this client call */ + trace_rxrpc_abort(call->debug_id, rxrpc_sendmsg_late_send, + call->cid, call->call_id, call->rx_consumed, + 0, -EPROTO); goto maybe_error; + } ret = -EMSGSIZE; if (call->tx_total_len != -1) { @@ -329,7 +392,7 @@ reload: /* check for the far side aborting the call or a network error * occurring */ - if (call->state == RXRPC_CALL_COMPLETE) + if (rxrpc_call_is_complete(call)) goto call_terminated; /* add the packet to the send queue if it's now full */ @@ -354,12 +417,9 @@ reload: success: ret = copied; - if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) { - read_lock(&call->state_lock); - if (call->error < 0) - ret = call->error; - read_unlock(&call->state_lock); - } + if (rxrpc_call_is_complete(call) && + call->error < 0) + ret = call->error; out: call->tx_pending = txb; _leave(" = %d", ret); @@ -543,7 +603,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, atomic_inc_return(&rxrpc_debug_id)); /* The socket is now unlocked */ - rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp); _leave(" = %p\n", call); return call; } @@ -556,7 +615,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) __releases(&rx->sk.sk_lock.slock) { - enum rxrpc_call_state state; struct rxrpc_call *call; unsigned long now, j; bool dropped_lock = false; @@ -598,10 +656,10 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) return PTR_ERR(call); /* ... and we have the call lock. */ ret = 0; - if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) + if (rxrpc_call_is_complete(call)) goto out_put_unlock; } else { - switch (READ_ONCE(call->state)) { + switch (rxrpc_call_state(call)) { case RXRPC_CALL_UNINITIALISED: case RXRPC_CALL_CLIENT_AWAIT_CONN: case RXRPC_CALL_SERVER_PREALLOC: @@ -655,17 +713,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) break; } - state = READ_ONCE(call->state); - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, state, call->conn); - - if (state >= RXRPC_CALL_COMPLETE) { + if (rxrpc_call_is_complete(call)) { /* it's too late for this call */ ret = -ESHUTDOWN; } else if (p.command == RXRPC_CMD_SEND_ABORT) { + rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED, + rxrpc_abort_call_sendmsg); ret = 0; - if (rxrpc_abort_call("CMD", call, 0, p.abort_code, -ECONNABORTED)) - ret = rxrpc_send_abort_packet(call); } else if (p.command != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; } else { @@ -705,34 +759,17 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, bool dropped_lock = false; int ret; - _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); + _enter("{%d},", call->debug_id); ASSERTCMP(msg->msg_name, ==, NULL); ASSERTCMP(msg->msg_control, ==, NULL); mutex_lock(&call->user_mutex); - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - switch (READ_ONCE(call->state)) { - case RXRPC_CALL_CLIENT_SEND_REQUEST: - case RXRPC_CALL_SERVER_ACK_REQUEST: - case RXRPC_CALL_SERVER_SEND_REPLY: - ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len, - notify_end_tx, &dropped_lock); - break; - case RXRPC_CALL_COMPLETE: - read_lock(&call->state_lock); + ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len, + notify_end_tx, &dropped_lock); + if (ret == -ESHUTDOWN) ret = call->error; - read_unlock(&call->state_lock); - break; - default: - /* Request phase complete for this client call */ - trace_rxrpc_rx_eproto(call, 0, tracepoint_string("late_send")); - ret = -EPROTO; - break; - } if (!dropped_lock) mutex_unlock(&call->user_mutex); @@ -747,24 +784,20 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data); * @call: The call to be aborted * @abort_code: The abort code to stick into the ABORT packet * @error: Local error value - * @why: 3-char string indicating why. + * @why: Indication as to why. * * Allow a kernel service to abort a call, if it's still in an abortable state * and return true if the call was aborted, false if it was already complete. */ bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, - u32 abort_code, int error, const char *why) + u32 abort_code, int error, enum rxrpc_abort_reason why) { bool aborted; - _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); + _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why); mutex_lock(&call->user_mutex); - - aborted = rxrpc_abort_call(why, call, 0, abort_code, error); - if (aborted) - rxrpc_send_abort_packet(call); - + aborted = rxrpc_propose_abort(call, abort_code, error, why); mutex_unlock(&call->user_mutex); return aborted; } diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index ff47ce4d3968..6b26bdb999d7 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -134,6 +134,11 @@ static int valid_label(const struct nlattr *attr, { const u32 *label = nla_data(attr); + if (nla_len(attr) != sizeof(*label)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid MPLS label length"); + return -EINVAL; + } + if (*label & ~MPLS_LABEL_MASK || *label == MPLS_LABEL_IMPLNULL) { NL_SET_ERR_MSG_MOD(extack, "MPLS label out of range"); return -EINVAL; @@ -145,7 +150,8 @@ static int valid_label(const struct nlattr *attr, static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = { [TCA_MPLS_PARMS] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_mpls)), [TCA_MPLS_PROTO] = { .type = NLA_U16 }, - [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_U32, valid_label), + [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + valid_label), [TCA_MPLS_TC] = NLA_POLICY_RANGE(NLA_U8, 0, 7), [TCA_MPLS_TTL] = NLA_POLICY_MIN(NLA_U8, 1), [TCA_MPLS_BOS] = NLA_POLICY_RANGE(NLA_U8, 0, 1), diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2317db02c764..72d2c204d5f3 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1133,6 +1133,11 @@ skip: return -ENOENT; } + if (new && new->ops == &noqueue_qdisc_ops) { + NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class"); + return -EINVAL; + } + err = cops->graft(parent, cl, new, &old, extack); if (err) return err; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 7bb247c51e2f..2d7b1e03110a 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -302,7 +302,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth list_for_each_entry(pos, &pipe->in_downcall, list) { if (!uid_eq(pos->uid, uid)) continue; - if (auth && pos->auth->service != auth->service) + if (pos->auth->service != auth->service) continue; refcount_inc(&pos->count); return pos; @@ -686,6 +686,21 @@ out: return err; } +static struct gss_upcall_msg * +gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid) +{ + struct gss_upcall_msg *pos; + list_for_each_entry(pos, &pipe->in_downcall, list) { + if (!uid_eq(pos->uid, uid)) + continue; + if (!rpc_msg_is_inflight(&pos->msg)) + continue; + refcount_inc(&pos->count); + return pos; + } + return NULL; +} + #define MSG_BUF_MAXSIZE 1024 static ssize_t @@ -732,7 +747,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = -ENOENT; /* Find a matching upcall */ spin_lock(&pipe->lock); - gss_msg = __gss_find_upcall(pipe, uid, NULL); + gss_msg = gss_find_downcall(pipe, uid); if (gss_msg == NULL) { spin_unlock(&pipe->lock); goto err_put_ctx; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 148bb0a7fa5b..acb822b23af1 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -923,7 +923,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g * rejecting the server-computed MIC in this somewhat rare case, * do not use splice with the GSS integrity service. */ - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* Did we already verify the signature on the original pass through? */ if (rqstp->rq_deferred) @@ -990,7 +990,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs int pad, remaining_len, offset; u32 rseqno; - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); priv_len = svc_getnl(&buf->head[0]); if (rqstp->rq_deferred) { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 85f0c3cfc877..f06622814a95 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1243,10 +1243,10 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto err_short_len; /* Will be turned off by GSS integrity and privacy services */ - __set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* Will be turned off only when NFSv4 Sessions are used */ - __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); - __clear_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + clear_bit(RQ_DROPME, &rqstp->rq_flags); svc_putu32(resv, rqstp->rq_xid); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2106003645a7..c2ce12538008 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1238,7 +1238,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) trace_svc_defer(rqstp); svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); dr->handle.revisit = svc_revisit; return &dr->handle; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 015714398007..815baf308236 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -298,9 +298,9 @@ static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs) static void svc_sock_secure_port(struct svc_rqst *rqstp) { if (svc_port_is_privileged(svc_addr(rqstp))) - __set_bit(RQ_SECURE, &rqstp->rq_flags); + set_bit(RQ_SECURE, &rqstp->rq_flags); else - __clear_bit(RQ_SECURE, &rqstp->rq_flags); + clear_bit(RQ_SECURE, &rqstp->rq_flags); } /* @@ -1008,9 +1008,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags)) - __set_bit(RQ_LOCAL, &rqstp->rq_flags); + set_bit(RQ_LOCAL, &rqstp->rq_flags); else - __clear_bit(RQ_LOCAL, &rqstp->rq_flags); + clear_bit(RQ_LOCAL, &rqstp->rq_flags); p = (__be32 *)rqstp->rq_arg.head[0].iov_base; calldir = p[1]; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 199fa012f18a..94b20fb47135 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -602,7 +602,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) static void svc_rdma_secure_port(struct svc_rqst *rqstp) { - __set_bit(RQ_SECURE, &rqstp->rq_flags); + set_bit(RQ_SECURE, &rqstp->rq_flags); } static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt) diff --git a/net/tipc/node.c b/net/tipc/node.c index 49ddc484c4fe..5e000fde8067 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1179,8 +1179,9 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool addr_match = false; bool sign_match = false; bool link_up = false; + bool link_is_reset = false; bool accept_addr = false; - bool reset = true; + bool reset = false; char *if_name; unsigned long intv; u16 session; @@ -1200,14 +1201,14 @@ void tipc_node_check_dest(struct net *net, u32 addr, /* Prepare to validate requesting node's signature and media address */ l = le->link; link_up = l && tipc_link_is_up(l); + link_is_reset = l && tipc_link_is_reset(l); addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr)); sign_match = (signature == n->signature); /* These three flags give us eight permutations: */ if (sign_match && addr_match && link_up) { - /* All is fine. Do nothing. */ - reset = false; + /* All is fine. Ignore requests. */ /* Peer node is not a container/local namespace */ if (!n->peer_hash_mix) n->peer_hash_mix = hash_mixes; @@ -1232,6 +1233,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, */ accept_addr = true; *respond = true; + reset = true; } else if (!sign_match && addr_match && link_up) { /* Peer node rebooted. Two possibilities: * - Delayed re-discovery; this link endpoint has already @@ -1263,6 +1265,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, n->signature = signature; accept_addr = true; *respond = true; + reset = true; } if (!accept_addr) @@ -1291,6 +1294,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, tipc_link_fsm_evt(l, LINK_RESET_EVT); if (n->state == NODE_FAILINGOVER) tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); + link_is_reset = tipc_link_is_reset(l); le->link = l; n->link_cnt++; tipc_node_calculate_timer(n, l); @@ -1303,7 +1307,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: tipc_node_write_unlock(n); - if (reset && l && !tipc_link_is_reset(l)) + if (reset && !link_is_reset) tipc_node_link_down(n, b->identity, false); tipc_node_put(n); } diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 0ee296cf520c..43343e13c542 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -44,6 +44,7 @@ modpost-args = \ $(if $(CONFIG_MODVERSIONS),-m) \ $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ + $(if $(KBUILD_MODPOST_WARN),-w) \ $(if $(KBUILD_NSDEPS),-d $(MODULES_NSDEPS)) \ $(if $(CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS)$(KBUILD_NSDEPS),-N) \ -o $@ @@ -55,10 +56,6 @@ ifneq ($(findstring i,$(filter-out --%,$(MAKEFLAGS))),) modpost-args += -n endif -ifneq ($(KBUILD_MODPOST_WARN)$(missing-input),) -modpost-args += -w -endif - # Read out modules.order to pass in modpost. # Otherwise, allmodconfig would fail with "Argument list too long". ifdef KBUILD_MODULES @@ -124,6 +121,10 @@ modpost-args += -e $(addprefix -i , $(KBUILD_EXTRA_SYMBOLS)) endif # ($(KBUILD_EXTMOD),) +ifneq ($(missing-input),) +modpost-args += -w +endif + quiet_cmd_modpost = MODPOST $@ cmd_modpost = \ $(if $(missing-input), \ diff --git a/sound/xen/xen_snd_front.c b/sound/xen/xen_snd_front.c index 4041748c12e5..b66e037710d0 100644 --- a/sound/xen/xen_snd_front.c +++ b/sound/xen/xen_snd_front.c @@ -311,7 +311,7 @@ static int xen_drv_probe(struct xenbus_device *xb_dev, return xenbus_switch_state(xb_dev, XenbusStateInitialising); } -static int xen_drv_remove(struct xenbus_device *dev) +static void xen_drv_remove(struct xenbus_device *dev) { struct xen_snd_front_info *front_info = dev_get_drvdata(&dev->dev); int to = 100; @@ -345,7 +345,6 @@ static int xen_drv_remove(struct xenbus_device *dev) xen_snd_drv_fini(front_info); xenbus_frontend_closed(dev); - return 0; } static const struct xenbus_device_id xen_drv_ids[] = { diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 5fc5b8029bff..7380093ba9e7 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -192,6 +192,7 @@ struct sys_stat_struct { __asm__ (".section .text\n" ".weak __start\n" ".set nomips16\n" + ".set push\n" ".set noreorder\n" ".option pic0\n" ".ent __start\n" @@ -210,6 +211,7 @@ __asm__ (".section .text\n" "li $v0, 4001\n" // NR_exit == 4001 "syscall\n" ".end __start\n" + ".set pop\n" ""); #endif // _NOLIBC_ARCH_MIPS_H diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h index ba04771cb3a3..a3bdd9803f8c 100644 --- a/tools/include/nolibc/arch-riscv.h +++ b/tools/include/nolibc/arch-riscv.h @@ -11,13 +11,13 @@ #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 -#define O_CREAT 0x100 -#define O_EXCL 0x200 -#define O_NOCTTY 0x400 -#define O_TRUNC 0x1000 -#define O_APPEND 0x2000 -#define O_NONBLOCK 0x4000 -#define O_DIRECTORY 0x200000 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x10000 struct sys_stat_struct { unsigned long st_dev; /* Device. */ diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h index e3000b2992d7..6f90706d0644 100644 --- a/tools/include/nolibc/ctype.h +++ b/tools/include/nolibc/ctype.h @@ -96,4 +96,7 @@ int ispunct(int c) return isgraph(c) && !isalnum(c); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_CTYPE_H */ diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h index 06893d6dfb7a..9dc4919c769b 100644 --- a/tools/include/nolibc/errno.h +++ b/tools/include/nolibc/errno.h @@ -24,4 +24,7 @@ static int errno; */ #define MAX_ERRNO 4095 +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_ERRNO_H */ diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h index ef47e71e2be3..137552216e46 100644 --- a/tools/include/nolibc/signal.h +++ b/tools/include/nolibc/signal.h @@ -19,4 +19,7 @@ int raise(int signal) return sys_kill(sys_getpid(), signal); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_SIGNAL_H */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index a3cebc4bc3ac..96ac8afc5aee 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -303,4 +303,7 @@ void perror(const char *msg) fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_STDIO_H */ diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h index 92378c4b9660..a24000d1e822 100644 --- a/tools/include/nolibc/stdlib.h +++ b/tools/include/nolibc/stdlib.h @@ -419,4 +419,7 @@ char *u64toa(uint64_t in) return itoa_buffer; } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_STDLIB_H */ diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h index ad97c0d522b8..fffdaf6ff467 100644 --- a/tools/include/nolibc/string.h +++ b/tools/include/nolibc/string.h @@ -88,8 +88,11 @@ void *memset(void *dst, int b, size_t len) { char *p = dst; - while (len--) + while (len--) { + /* prevent gcc from recognizing memset() here */ + asm volatile(""); *(p++) = b; + } return dst; } @@ -285,4 +288,7 @@ char *strrchr(const char *s, int c) return (char *)ret; } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_STRING_H */ diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index ce3ee03aa679..78473d34e27c 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -1243,5 +1243,7 @@ ssize_t write(int fd, const void *buf, size_t count) return ret; } +/* make sure to include all global symbols */ +#include "nolibc.h" #endif /* _NOLIBC_SYS_H */ diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index d18b7661fdd7..84655361b9ad 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -25,4 +25,7 @@ time_t time(time_t *tptr) return tv.tv_sec; } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_TIME_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index 959997034e55..fbbc0e68c001 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -26,13 +26,13 @@ #define S_IFSOCK 0140000 #define S_IFMT 0170000 -#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR) -#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR) -#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK) -#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG) -#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO) -#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK) -#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK) +#define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) +#define S_ISCHR(mode) (((mode) & S_IFMT) == S_IFCHR) +#define S_ISBLK(mode) (((mode) & S_IFMT) == S_IFBLK) +#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) +#define S_ISFIFO(mode) (((mode) & S_IFMT) == S_IFIFO) +#define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK) +#define S_ISSOCK(mode) (((mode) & S_IFMT) == S_IFSOCK) /* dirent types */ #define DT_UNKNOWN 0x0 @@ -89,39 +89,46 @@ #define EXIT_SUCCESS 0 #define EXIT_FAILURE 1 +#define FD_SETIDXMASK (8 * sizeof(unsigned long)) +#define FD_SETBITMASK (8 * sizeof(unsigned long)-1) + /* for select() */ typedef struct { - uint32_t fd32[(FD_SETSIZE + 31) / 32]; + unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK]; } fd_set; -#define FD_CLR(fd, set) do { \ - fd_set *__set = (set); \ - int __fd = (fd); \ - if (__fd >= 0) \ - __set->fd32[__fd / 32] &= ~(1U << (__fd & 31)); \ +#define FD_CLR(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fds[__fd / FD_SETIDXMASK] &= \ + ~(1U << (__fd & FX_SETBITMASK)); \ } while (0) -#define FD_SET(fd, set) do { \ - fd_set *__set = (set); \ - int __fd = (fd); \ - if (__fd >= 0) \ - __set->fd32[__fd / 32] |= 1U << (__fd & 31); \ +#define FD_SET(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fds[__fd / FD_SETIDXMASK] |= \ + 1 << (__fd & FD_SETBITMASK); \ } while (0) -#define FD_ISSET(fd, set) ({ \ - fd_set *__set = (set); \ - int __fd = (fd); \ - int __r = 0; \ - if (__fd >= 0) \ - __r = !!(__set->fd32[__fd / 32] & 1U << (__fd & 31)); \ - __r; \ +#define FD_ISSET(fd, set) ({ \ + fd_set *__set = (set); \ + int __fd = (fd); \ + int __r = 0; \ + if (__fd >= 0) \ + __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \ +1U << (__fd & FD_SET_BITMASK)); \ + __r; \ }) -#define FD_ZERO(set) do { \ - fd_set *__set = (set); \ - int __idx; \ - for (__idx = 0; __idx < (FD_SETSIZE+31) / 32; __idx ++) \ - __set->fd32[__idx] = 0; \ +#define FD_ZERO(set) do { \ + fd_set *__set = (set); \ + int __idx; \ + int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\ + for (__idx = 0; __idx < __size; __idx++) \ + __set->fds[__idx] = 0; \ } while (0) /* for poll() */ @@ -202,4 +209,7 @@ struct stat { }) #endif +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_TYPES_H */ diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h index 1c25e20ee360..1cfcd52106a4 100644 --- a/tools/include/nolibc/unistd.h +++ b/tools/include/nolibc/unistd.h @@ -51,4 +51,7 @@ int tcsetpgrp(int fd, pid_t pid) return ioctl(fd, TIOCSPGRP, &pid); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_UNISTD_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4350be739f4f..4b7c8b33069e 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -427,6 +427,15 @@ static int decode_instructions(struct objtool_file *file) if (func->type != STT_NOTYPE && func->type != STT_FUNC) continue; + if (func->offset == sec->sh.sh_size) { + /* Heuristic: likely an "end" symbol */ + if (func->type == STT_NOTYPE) + continue; + WARN("%s(): STT_FUNC at end of section", + func->name); + return -1; + } + if (func->return_thunk || func->alias != func) continue; diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 6e7b88917ca0..ba5d942e4c6a 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -267,7 +267,7 @@ $(OUTPUT)%.xml : %.txt $(ASCIIDOC) -b docbook -d manpage \ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \ -aperf_date=$(shell git log -1 --pretty="format:%cd" \ - --date=short $<) \ + --date=short --no-show-signature $<) \ -o $@+ $< && \ mv $@+ $@ diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c2504c39bdcb..5b8784675903 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -589,6 +589,8 @@ ifndef NO_LIBELF $(call feature_check,libbpf-bpf_program__set_insns) ifeq ($(feature-libbpf-bpf_program__set_insns), 1) CFLAGS += -DHAVE_LIBBPF_BPF_PROGRAM__SET_INSNS + else + dummy := $(error Error: libbpf devel library needs to be >= 0.8.0 to build with LIBBPF_DYNAMIC, update or build statically with the version that comes with the kernel sources); endif $(call feature_check,libbpf-btf__raw_data) ifeq ($(feature-libbpf-btf__raw_data), 1) @@ -602,6 +604,8 @@ ifndef NO_LIBELF dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif else + # Libbpf will be built as a static library from tools/lib/bpf. + LIBBPF_STATIC := 1 CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM @@ -1314,14 +1318,6 @@ tip_instdir_SQ = $(subst ','\'',$(tip_instdir)) export perfexec_instdir_SQ -# If we install to $(HOME) we keep the traceevent default: -# $(HOME)/.traceevent/plugins -# Otherwise we install plugins into the global $(libdir). -ifdef DESTDIR -plugindir=$(libdir)/traceevent/plugins -plugindir_SQ= $(subst ','\'',$(plugindir)) -endif - print_var = $(eval $(print_var_code)) $(info $(MSG)) define print_var_code MSG = $(shell printf '...%40s: %s' $(1) $($(1))) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 13e7d26e77f0..b7d9c4206230 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -303,10 +303,12 @@ ifneq ($(OUTPUT),) else LIBBPF_OUTPUT = $(CURDIR)/libbpf endif -LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) -LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include -LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a -CFLAGS += -I$(LIBBPF_OUTPUT)/include +ifdef LIBBPF_STATIC + LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) + LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include + LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a + CFLAGS += -I$(LIBBPF_OUTPUT)/include +endif ifneq ($(OUTPUT),) LIBSUBCMD_OUTPUT = $(abspath $(OUTPUT))/libsubcmd @@ -393,10 +395,8 @@ endif export PERL_PATH PERFLIBS = $(LIBAPI) $(LIBPERF) $(LIBSUBCMD) $(LIBSYMBOL) -ifndef NO_LIBBPF - ifndef LIBBPF_DYNAMIC - PERFLIBS += $(LIBBPF) - endif +ifdef LIBBPF_STATIC + PERFLIBS += $(LIBBPF) endif # We choose to avoid "if .. else if .. else .. endif endif" @@ -756,12 +756,15 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(arch_errno_name_array) \ $(sync_file_range_arrays) \ $(LIBAPI) \ - $(LIBBPF) \ $(LIBPERF) \ $(LIBSUBCMD) \ $(LIBSYMBOL) \ bpf-skel +ifdef LIBBPF_STATIC +prepare: $(LIBBPF) +endif + $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -819,7 +822,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) $(LIBAPI): FORCE | $(LIBAPI_OUTPUT) $(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \ - DESTDIR=$(LIBAPI_DESTDIR) prefix= \ + DESTDIR=$(LIBAPI_DESTDIR) prefix= subdir= \ $@ install_headers $(LIBAPI)-clean: @@ -828,7 +831,7 @@ $(LIBAPI)-clean: $(LIBBPF): FORCE | $(LIBBPF_OUTPUT) $(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \ - O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= subdir= \ $@ install_headers $(LIBBPF)-clean: @@ -837,7 +840,7 @@ $(LIBBPF)-clean: $(LIBPERF): FORCE | $(LIBPERF_OUTPUT) $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(LIBPERF_OUTPUT) \ - DESTDIR=$(LIBPERF_DESTDIR) prefix= \ + DESTDIR=$(LIBPERF_DESTDIR) prefix= subdir= \ $@ install_headers $(LIBPERF)-clean: @@ -846,7 +849,7 @@ $(LIBPERF)-clean: $(LIBSUBCMD): FORCE | $(LIBSUBCMD_OUTPUT) $(Q)$(MAKE) -C $(LIBSUBCMD_DIR) O=$(LIBSUBCMD_OUTPUT) \ - DESTDIR=$(LIBSUBCMD_DESTDIR) prefix= \ + DESTDIR=$(LIBSUBCMD_DESTDIR) prefix= subdir= \ $@ install_headers $(LIBSUBCMD)-clean: @@ -855,7 +858,7 @@ $(LIBSUBCMD)-clean: $(LIBSYMBOL): FORCE | $(LIBSYMBOL_OUTPUT) $(Q)$(MAKE) -C $(LIBSYMBOL_DIR) O=$(LIBSYMBOL_OUTPUT) \ - DESTDIR=$(LIBSYMBOL_DESTDIR) prefix= \ + DESTDIR=$(LIBSYMBOL_DESTDIR) prefix= subdir= \ $@ install_headers $(LIBSYMBOL)-clean: diff --git a/tools/perf/arch/riscv/util/unwind-libdw.c b/tools/perf/arch/riscv/util/unwind-libdw.c index 19536e172850..54a198714eb8 100644 --- a/tools/perf/arch/riscv/util/unwind-libdw.c +++ b/tools/perf/arch/riscv/util/unwind-libdw.c @@ -4,7 +4,7 @@ #include <elfutils/libdwfl.h> #include "../../util/unwind-libdw.h" #include "../../util/perf_regs.h" -#include "../../util/event.h" +#include "../../util/sample.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index e20656c431a4..8ae0a1535293 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -26,6 +26,7 @@ #include "util/string2.h" #include <linux/kernel.h> +#include <linux/numa.h> #include <linux/rbtree.h> #include <linux/string.h> #include <linux/zalloc.h> @@ -185,22 +186,33 @@ static int evsel__process_alloc_event(struct evsel *evsel, struct perf_sample *s total_allocated += bytes_alloc; nr_allocs++; - return 0; -} -static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_sample *sample) -{ - int ret = evsel__process_alloc_event(evsel, sample); + /* + * Commit 11e9734bcb6a ("mm/slab_common: unify NUMA and UMA + * version of tracepoints") adds the field "node" into the + * tracepoints 'kmalloc' and 'kmem_cache_alloc'. + * + * The legacy tracepoints 'kmalloc_node' and 'kmem_cache_alloc_node' + * also contain the field "node". + * + * If the tracepoint contains the field "node" the tool stats the + * cross allocation. + */ + if (evsel__field(evsel, "node")) { + int node1, node2; - if (!ret) { - int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}), - node2 = evsel__intval(evsel, sample, "node"); + node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}); + node2 = evsel__intval(evsel, sample, "node"); - if (node1 != node2) + /* + * If the field "node" is NUMA_NO_NODE (-1), we don't take it + * as a cross allocation. + */ + if ((node2 != NUMA_NO_NODE) && (node1 != node2)) nr_cross_allocs++; } - return ret; + return 0; } static int ptr_cmp(void *, void *); @@ -1369,8 +1381,8 @@ static int __cmd_kmem(struct perf_session *session) /* slab allocator */ { "kmem:kmalloc", evsel__process_alloc_event, }, { "kmem:kmem_cache_alloc", evsel__process_alloc_event, }, - { "kmem:kmalloc_node", evsel__process_alloc_node_event, }, - { "kmem:kmem_cache_alloc_node", evsel__process_alloc_node_event, }, + { "kmem:kmalloc_node", evsel__process_alloc_event, }, + { "kmem:kmem_cache_alloc_node", evsel__process_alloc_event, }, { "kmem:kfree", evsel__process_free_event, }, { "kmem:kmem_cache_free", evsel__process_free_event, }, /* page allocator */ @@ -1824,6 +1836,19 @@ static int parse_line_opt(const struct option *opt __maybe_unused, return 0; } +static bool slab_legacy_tp_is_exposed(void) +{ + /* + * The tracepoints "kmem:kmalloc_node" and + * "kmem:kmem_cache_alloc_node" have been removed on the latest + * kernel, if the tracepoint "kmem:kmalloc_node" is existed it + * means the tool is running on an old kernel, we need to + * rollback to support these legacy tracepoints. + */ + return IS_ERR(trace_event__tp_format("kmem", "kmalloc_node")) ? + false : true; +} + static int __cmd_record(int argc, const char **argv) { const char * const record_args[] = { @@ -1831,22 +1856,28 @@ static int __cmd_record(int argc, const char **argv) }; const char * const slab_events[] = { "-e", "kmem:kmalloc", - "-e", "kmem:kmalloc_node", "-e", "kmem:kfree", "-e", "kmem:kmem_cache_alloc", - "-e", "kmem:kmem_cache_alloc_node", "-e", "kmem:kmem_cache_free", }; + const char * const slab_legacy_events[] = { + "-e", "kmem:kmalloc_node", + "-e", "kmem:kmem_cache_alloc_node", + }; const char * const page_events[] = { "-e", "kmem:mm_page_alloc", "-e", "kmem:mm_page_free", }; unsigned int rec_argc, i, j; const char **rec_argv; + unsigned int slab_legacy_tp_exposed = slab_legacy_tp_is_exposed(); rec_argc = ARRAY_SIZE(record_args) + argc - 1; - if (kmem_slab) + if (kmem_slab) { rec_argc += ARRAY_SIZE(slab_events); + if (slab_legacy_tp_exposed) + rec_argc += ARRAY_SIZE(slab_legacy_events); + } if (kmem_page) rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */ @@ -1861,6 +1892,10 @@ static int __cmd_record(int argc, const char **argv) if (kmem_slab) { for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++) rec_argv[i] = strdup(slab_events[j]); + if (slab_legacy_tp_exposed) { + for (j = 0; j < ARRAY_SIZE(slab_legacy_events); j++, i++) + rec_argv[i] = strdup(slab_legacy_events[j]); + } } if (kmem_page) { rec_argv[i++] = strdup("-g"); diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 718b82bfcdff..506c2fe42d52 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1670,6 +1670,7 @@ static int __cmd_report(bool display_info) /* for lock function check */ symbol_conf.sort_by_name = true; + symbol_conf.allow_aliases = true; symbol__init(&session->header.env); if (!data.is_pipe) { @@ -1757,6 +1758,7 @@ static int __cmd_contention(int argc, const char **argv) /* for lock function check */ symbol_conf.sort_by_name = true; + symbol_conf.allow_aliases = true; symbol__init(&session->header.env); if (use_bpf) { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 86e06f136f40..d21fe0f32a6d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -16,7 +16,9 @@ #include "util/record.h" #include <api/fs/tracing_path.h> +#ifdef HAVE_LIBBPF_SUPPORT #include <bpf/bpf.h> +#endif #include "util/bpf_map.h" #include "util/rlimit.h" #include "builtin.h" diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 8fcab5ad00c5..e8d2762adade 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -16,20 +16,20 @@ perf-ftrace mainporcelain common perf-inject mainporcelain common perf-iostat mainporcelain common perf-kallsyms mainporcelain common -perf-kmem mainporcelain common +perf-kmem mainporcelain traceevent perf-kvm mainporcelain common -perf-kwork mainporcelain common +perf-kwork mainporcelain traceevent perf-list mainporcelain common -perf-lock mainporcelain common +perf-lock mainporcelain traceevent perf-mem mainporcelain common perf-probe mainporcelain full perf-record mainporcelain common perf-report mainporcelain common -perf-sched mainporcelain common +perf-sched mainporcelain traceevent perf-script mainporcelain common perf-stat mainporcelain common perf-test mainporcelain common -perf-timechart mainporcelain common +perf-timechart mainporcelain traceevent perf-top mainporcelain common perf-trace mainporcelain audit perf-version mainporcelain common diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c index bd83d364cf30..91778b5c6125 100644 --- a/tools/perf/tests/bpf-script-test-prologue.c +++ b/tools/perf/tests/bpf-script-test-prologue.c @@ -20,6 +20,8 @@ # undef if #endif +typedef unsigned int __bitwise fmode_t; + #define FMODE_READ 0x1 #define FMODE_WRITE 0x2 diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 05e818a8bbad..009d6efb673c 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -222,19 +222,7 @@ installed_files_bin := bin/perf installed_files_bin += etc/bash_completion.d/perf installed_files_bin += libexec/perf-core/perf-archive -installed_files_plugins := $(lib)/traceevent/plugins/plugin_cfg80211.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_scsi.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_xen.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_function.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_sched_switch.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_mac80211.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_kvm.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_kmem.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_hrtimer.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_jbd2.so - installed_files_all := $(installed_files_bin) -installed_files_all += $(installed_files_plugins) test_make_install := $(call test_dest_files,$(installed_files_all)) test_make_install_O := $(call test_dest_files,$(installed_files_all)) diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index 34c400ccbe04..57e7a6a470c9 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -37,6 +37,7 @@ trace_libc_inet_pton_backtrace() { case "$(uname -m)" in s390x) eventattr='call-graph=dwarf,max-stack=4' + echo "text_to_binary_address.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected echo "(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected echo "main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected @@ -57,7 +58,7 @@ trace_libc_inet_pton_backtrace() { perf_data=`mktemp -u /tmp/perf.data.XXX` perf_script=`mktemp -u /tmp/perf.script.XXX` perf record -e $event_name/$eventattr/ -o $perf_data ping -6 -c 1 ::1 > /dev/null 2>&1 - perf script -i $perf_data > $perf_script + perf script -i $perf_data | tac | grep -m1 ^ping -B9 | tac > $perf_script exec 3<$perf_script exec 4<$expected diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index 3cc42821d9b3..d7dc7c28508c 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -19,7 +19,7 @@ TAG= if test -d ../../.git -o -f ../../.git then TAG=$(MAKEFLAGS= make -sC ../.. kernelversion) - CID=$(git log -1 --abbrev=12 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID" + CID=$(git log -1 --abbrev=12 --pretty=format:"%h" --no-show-signature 2>/dev/null) && CID="-g$CID" elif test -f ../../PERF-VERSION-FILE then TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g') diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 265d20cc126b..c2e323cd7d49 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -2611,7 +2611,7 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start, *size = sym->start - *start; if (idx > 0) { if (*size) - return 1; + return 0; } else if (dso_sym_match(sym, sym_name, &cnt, idx)) { print_duplicate_syms(dso, sym_name); return -EINVAL; diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index 4dbf26408b69..c6d21c07b14c 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -4,9 +4,12 @@ #include <linux/list.h> #include <sys/resource.h> + +#ifdef HAVE_LIBBPF_SUPPORT #include <bpf/bpf.h> #include <bpf/btf.h> #include <bpf/libbpf.h> +#endif struct evsel; struct target; @@ -87,6 +90,8 @@ static inline void set_max_rlimit(void) setrlimit(RLIMIT_MEMLOCK, &rinf); } +#ifdef HAVE_BPF_SKEL + static inline __u32 bpf_link_get_id(int fd) { struct bpf_link_info link_info = { .id = 0, }; @@ -127,5 +132,6 @@ static inline int bperf_trigger_reading(int prog_fd, int cpu) return bpf_prog_test_run_opts(prog_fd, &opts); } +#endif /* HAVE_BPF_SKEL */ #endif /* __PERF_BPF_COUNTER_H */ diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index 3c2df7522f6f..1c82377ed78b 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -116,27 +116,19 @@ static int bperf_load_program(struct evlist *evlist) /* open single copy of the events w/o cgroup */ err = evsel__open_per_cpu(evsel, evsel->core.cpus, -1); - if (err) { - pr_err("Failed to open first cgroup events\n"); - goto out; - } + if (err == 0) + evsel->supported = true; map_fd = bpf_map__fd(skel->maps.events); perf_cpu_map__for_each_cpu(cpu, j, evsel->core.cpus) { int fd = FD(evsel, j); __u32 idx = evsel->core.idx * total_cpus + cpu.cpu; - err = bpf_map_update_elem(map_fd, &idx, &fd, - BPF_ANY); - if (err < 0) { - pr_err("Failed to update perf_event fd\n"); - goto out; - } + bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY); } evsel->cgrp = leader_cgrp; } - evsel->supported = true; if (evsel->cgrp == cgrp) continue; diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index e99b41f9be45..cd978c240e0d 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -224,6 +224,19 @@ static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unus return 0; } +static int check_and_add_cgroup_name(const char *fpath) +{ + struct cgroup_name *cn; + + list_for_each_entry(cn, &cgroup_list, list) { + if (!strcmp(cn->name, fpath)) + return 0; + } + + /* pretend if it's added by ftw() */ + return add_cgroup_name(fpath, NULL, FTW_D, NULL); +} + static void release_cgroup_list(void) { struct cgroup_name *cn; @@ -242,7 +255,7 @@ static int list_cgroups(const char *str) struct cgroup_name *cn; char *s; - /* use given name as is - for testing purpose */ + /* use given name as is when no regex is given */ for (;;) { p = strchr(str, ','); e = p ? p : eos; @@ -253,13 +266,13 @@ static int list_cgroups(const char *str) s = strndup(str, e - str); if (!s) return -1; - /* pretend if it's added by ftw() */ - ret = add_cgroup_name(s, NULL, FTW_D, NULL); + + ret = check_and_add_cgroup_name(s); free(s); - if (ret) + if (ret < 0) return -1; } else { - if (add_cgroup_name("", NULL, FTW_D, NULL) < 0) + if (check_and_add_cgroup_name("/") < 0) return -1; } diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index a7f68c309545..fc16299c915f 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -132,6 +132,7 @@ int perf_data__open_dir(struct perf_data *data) file->size = st.st_size; } + closedir(dir); if (!files) return -EINVAL; @@ -140,6 +141,7 @@ int perf_data__open_dir(struct perf_data *data) return 0; out_err: + closedir(dir); close_dir(files, nr); return ret; } diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index c3cef36d4176..1b5140e5ce99 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -38,7 +38,7 @@ do done echo "#endif /* HAVE_LIBELF_SUPPORT */" -echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)" +echo "#if defined(HAVE_LIBTRACEEVENT) && (defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT))" sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt | sort | while read cmd @@ -51,5 +51,20 @@ do p }' "Documentation/perf-$cmd.txt" done -echo "#endif /* HAVE_LIBELF_SUPPORT */" +echo "#endif /* HAVE_LIBTRACEEVENT && (HAVE_LIBAUDIT_SUPPORT || HAVE_SYSCALL_TABLE_SUPPORT) */" + +echo "#ifdef HAVE_LIBTRACEEVENT" +sed -n -e 's/^perf-\([^ ]*\)[ ].* traceevent.*/\1/p' command-list.txt | +sort | +while read cmd +do + sed -n ' + /^NAME/,/perf-'"$cmd"'/H + ${ + x + s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ + p + }' "Documentation/perf-$cmd.txt" +done +echo "#endif /* HAVE_LIBTRACEEVENT */" echo "};" diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index e188f74698dd..37662cdec5ee 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2971,6 +2971,18 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok, ret = add_all_matching_fields(evlist, field_name, raw_trace, level); goto out; } +#else + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) { + pr_err("%s %s", ret ? "," : "This perf binary isn't linked with libtraceevent, can't process", evsel__name(evsel)); + ret = -ENOTSUP; + } + } + + if (ret) { + pr_err("\n"); + goto out; + } #endif evsel = find_evsel(evlist, event_name); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index add6c5d9531c..9b3cd79cca12 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -4,6 +4,7 @@ #include <stdbool.h> #include <stdio.h> +#include <sys/types.h> #include <linux/types.h> struct evlist; diff --git a/tools/testing/memblock/.gitignore b/tools/testing/memblock/.gitignore index 654338e0be52..4cc7cd5aac2b 100644 --- a/tools/testing/memblock/.gitignore +++ b/tools/testing/memblock/.gitignore @@ -1,4 +1,5 @@ main memblock.c linux/memblock.h +asm/asm.h asm/cmpxchg.h diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile index 2310ac4d080e..7a1ca694a982 100644 --- a/tools/testing/memblock/Makefile +++ b/tools/testing/memblock/Makefile @@ -29,13 +29,14 @@ include: ../../../include/linux/memblock.h ../../include/linux/*.h \ @mkdir -p linux test -L linux/memblock.h || ln -s ../../../../include/linux/memblock.h linux/memblock.h + test -L asm/asm.h || ln -s ../../../arch/x86/include/asm/asm.h asm/asm.h test -L asm/cmpxchg.h || ln -s ../../../arch/x86/include/asm/cmpxchg.h asm/cmpxchg.h memblock.c: $(EXTR_SRC) test -L memblock.c || ln -s $(EXTR_SRC) memblock.c clean: - $(RM) $(TARGETS) $(OFILES) linux/memblock.h memblock.c asm/cmpxchg.h + $(RM) $(TARGETS) $(OFILES) linux/memblock.h memblock.c asm/asm.h asm/cmpxchg.h help: @echo 'Memblock simulator' diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c index b57e91e1c3f2..532459a15067 100644 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -124,7 +124,7 @@ void producer(struct sockaddr_un *consumer_addr) wait_for_signal(pipefd[0]); if (connect(cfd, (struct sockaddr *)consumer_addr, - sizeof(struct sockaddr)) != 0) { + sizeof(*consumer_addr)) != 0) { perror("Connect failed"); kill(0, SIGTERM); exit(1); diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index dca1e6f777a8..f11756e7df2f 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -12,19 +12,27 @@ # In addition this script also checks if forcing a specific field in the # outer header is working. +# Return 4 by default (Kselftest SKIP code) +ERR=4 + if [ "$(id -u)" != "0" ]; then echo "Please run as root." - exit 0 + exit $ERR fi if ! which tcpdump > /dev/null 2>&1; then echo "No tcpdump found. Required for this test." - exit 0 + exit $ERR fi expected_tos="0x00" expected_ttl="0" failed=false +readonly NS0=$(mktemp -u ns0-XXXXXXXX) +readonly NS1=$(mktemp -u ns1-XXXXXXXX) + +RUN_NS0="ip netns exec ${NS0}" + get_random_tos() { # Get a random hex tos value between 0x00 and 0xfc, a multiple of 4 echo "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 1)\ @@ -61,7 +69,6 @@ setup() { local vlan="$5" local test_tos="0x00" local test_ttl="0" - local ns="ip netns exec testing" # We don't want a test-tos of 0x00, # because this is the value that we get when no tos is set. @@ -94,14 +101,15 @@ setup() { printf "│%7s │%6s │%6s │%13s │%13s │%6s │" \ "$type" "$outer" "$inner" "$tos" "$ttl" "$vlan" - # Create 'testing' netns, veth pair and connect main ns with testing ns - ip netns add testing - ip link add type veth - ip link set veth1 netns testing - ip link set veth0 up - $ns ip link set veth1 up - ip addr flush dev veth0 - $ns ip addr flush dev veth1 + # Create netns NS0 and NS1 and connect them with a veth pair + ip netns add "${NS0}" + ip netns add "${NS1}" + ip link add name veth0 netns "${NS0}" type veth \ + peer name veth1 netns "${NS1}" + ip -netns "${NS0}" link set dev veth0 up + ip -netns "${NS1}" link set dev veth1 up + ip -netns "${NS0}" address flush dev veth0 + ip -netns "${NS1}" address flush dev veth1 local local_addr1="" local local_addr2="" @@ -127,51 +135,59 @@ setup() { if [ "$type" = "gre" ]; then type="gretap" fi - ip addr add 198.18.0.1/24 dev veth0 - $ns ip addr add 198.18.0.2/24 dev veth1 - ip link add name tep0 type $type $local_addr1 remote \ - 198.18.0.2 tos $test_tos ttl $test_ttl $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 remote \ - 198.18.0.1 tos $test_tos ttl $test_ttl $vxlan $geneve + ip -netns "${NS0}" address add 198.18.0.1/24 dev veth0 + ip -netns "${NS1}" address add 198.18.0.2/24 dev veth1 + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote 198.18.0.2 tos $test_tos ttl $test_ttl \ + $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote 198.18.0.1 tos $test_tos ttl $test_ttl \ + $vxlan $geneve elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then type="ip6gretap" fi - ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 - $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 - ip link add name tep0 type $type $local_addr1 \ - remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \ - $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 \ - remote fdd1:ced0:5d88:3fce::1 tos $test_tos ttl $test_ttl \ - $vxlan $geneve + ip -netns "${NS0}" address add fdd1:ced0:5d88:3fce::1/64 \ + dev veth0 nodad + ip -netns "${NS1}" address add fdd1:ced0:5d88:3fce::2/64 \ + dev veth1 nodad + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote fdd1:ced0:5d88:3fce::2 tos $test_tos \ + ttl $test_ttl $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote fdd1:ced0:5d88:3fce::1 tos $test_tos \ + ttl $test_ttl $vxlan $geneve fi # Bring L2-tunnel link up and create VLAN on top - ip link set tep0 up - $ns ip link set tep1 up - ip addr flush dev tep0 - $ns ip addr flush dev tep1 + ip -netns "${NS0}" link set tep0 up + ip -netns "${NS1}" link set tep1 up + ip -netns "${NS0}" address flush dev tep0 + ip -netns "${NS1}" address flush dev tep1 local parent if $vlan; then parent="vlan99-" - ip link add link tep0 name ${parent}0 type vlan id 99 - $ns ip link add link tep1 name ${parent}1 type vlan id 99 - ip link set ${parent}0 up - $ns ip link set ${parent}1 up - ip addr flush dev ${parent}0 - $ns ip addr flush dev ${parent}1 + ip -netns "${NS0}" link add link tep0 name ${parent}0 \ + type vlan id 99 + ip -netns "${NS1}" link add link tep1 name ${parent}1 \ + type vlan id 99 + ip -netns "${NS0}" link set dev ${parent}0 up + ip -netns "${NS1}" link set dev ${parent}1 up + ip -netns "${NS0}" address flush dev ${parent}0 + ip -netns "${NS1}" address flush dev ${parent}1 else parent="tep" fi # Assign inner IPv4/IPv6 addresses if [ "$inner" = "4" ] || [ "$inner" = "other" ]; then - ip addr add 198.19.0.1/24 brd + dev ${parent}0 - $ns ip addr add 198.19.0.2/24 brd + dev ${parent}1 + ip -netns "${NS0}" address add 198.19.0.1/24 brd + dev ${parent}0 + ip -netns "${NS1}" address add 198.19.0.2/24 brd + dev ${parent}1 elif [ "$inner" = "6" ]; then - ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 - $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 + ip -netns "${NS0}" address add fdd4:96cf:4eae:443b::1/64 \ + dev ${parent}0 nodad + ip -netns "${NS1}" address add fdd4:96cf:4eae:443b::2/64 \ + dev ${parent}1 nodad fi } @@ -192,10 +208,10 @@ verify() { ping_dst="198.19.0.3" # Generates ARPs which are not IPv4/IPv6 fi if [ "$tos_ttl" = "inherit" ]; then - ping -i 0.1 $ping_dst -Q "$expected_tos" -t "$expected_ttl" \ - 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst -Q "$expected_tos" \ + -t "$expected_ttl" 2>/dev/null 1>&2 & ping_pid="$!" else - ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" fi local tunnel_type_offset tunnel_type_proto req_proto_offset req_offset if [ "$type" = "gre" ]; then @@ -216,10 +232,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x01 and \ - ip[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x01 and \ + ip[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then req_proto_offset="44" req_offset="78" @@ -231,10 +249,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x3a and \ - ip[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x3a and \ + ip[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then req_proto_offset="36" req_offset="45" @@ -250,11 +270,13 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x08 and \ - ip[$((req_proto_offset + 1))] = 0x06 and \ - ip[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x08 and \ + ip[$((req_proto_offset + 1))] = 0x06 and \ + ip[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then @@ -273,10 +295,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x01 and \ - ip6[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x01 and \ + ip6[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then local req_proto_offset="72" local req_offset="106" @@ -288,10 +312,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x3a and \ - ip6[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x3a and \ + ip6[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then local req_proto_offset="64" local req_offset="73" @@ -307,15 +333,17 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x08 and \ - ip6[$((req_proto_offset + 1))] = 0x06 and \ - ip6[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x08 and \ + ip6[$((req_proto_offset + 1))] = 0x06 and \ + ip6[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi fi kill -9 $ping_pid - wait $ping_pid 2>/dev/null + wait $ping_pid 2>/dev/null || true result="FAIL" if [ "$outer" = "4" ]; then captured_ttl="$(get_field "ttl" "$out")" @@ -351,11 +379,35 @@ verify() { } cleanup() { - ip link del veth0 2>/dev/null - ip netns del testing 2>/dev/null - ip link del tep0 2>/dev/null + ip netns del "${NS0}" 2>/dev/null + ip netns del "${NS1}" 2>/dev/null } +exit_handler() { + # Don't exit immediately if one of the intermediate commands fails. + # We might be called at the end of the script, when the network + # namespaces have already been deleted. So cleanup() may fail, but we + # still need to run until 'exit $ERR' or the script won't return the + # correct error code. + set +e + + cleanup + + exit $ERR +} + +# Restore the default SIGINT handler (just in case) and exit. +# The exit handler will take care of cleaning everything up. +interrupted() { + trap - INT + + exit $ERR +} + +set -e +trap exit_handler EXIT +trap interrupted INT + printf "┌────────┬───────┬───────┬──────────────┬" printf "──────────────┬───────┬────────┐\n" for type in gre vxlan geneve; do @@ -385,6 +437,10 @@ done printf "└────────┴───────┴───────┴──────────────┴" printf "──────────────┴───────┴────────┘\n" +# All tests done. +# Set ERR appropriately: it will be returned by the exit handler. if $failed; then - exit 1 + ERR=1 +else + ERR=0 fi |