diff options
author | Palmer Dabbelt <palmer@rivosinc.com> | 2024-09-12 16:23:05 +0200 |
---|---|---|
committer | Palmer Dabbelt <palmer@rivosinc.com> | 2024-09-12 16:23:05 +0200 |
commit | 9ea7b92b77df7d2eee3c31ef4a19f0f12ec74190 (patch) | |
tree | d1963d8effd4e52dc7e31bdf2e066eb0c2d1e0dc | |
parent | riscv: Add license to vmalloc.h (diff) | |
parent | riscv: remove limit on the size of read-only section for XIP kernel (diff) | |
download | linux-9ea7b92b77df7d2eee3c31ef4a19f0f12ec74190.tar.xz linux-9ea7b92b77df7d2eee3c31ef4a19f0f12ec74190.zip |
Merge patch series "remove size limit on XIP kernel"
Nam Cao <namcao@linutronix.de> says:
Hi,
For XIP kernel, the writable data section is always at offset specified in
XIP_OFFSET, which is hard-coded to 32MB.
Unfortunately, this means the read-only section (placed before the
writable section) is restricted in size. This causes build failure if the
kernel gets too large.
This series remove the use of XIP_OFFSET one by one, then remove this
macro entirely at the end, with the goal of lifting this size restriction.
Also some cleanup and documentation along the way.
* b4-shazam-merge
riscv: remove limit on the size of read-only section for XIP kernel
riscv: drop the use of XIP_OFFSET in create_kernel_page_table()
riscv: drop the use of XIP_OFFSET in kernel_mapping_va_to_pa()
riscv: drop the use of XIP_OFFSET in XIP_FIXUP_FLASH_OFFSET
riscv: drop the use of XIP_OFFSET in XIP_FIXUP_OFFSET
riscv: replace misleading va_kernel_pa_offset on XIP kernel
riscv: don't export va_kernel_pa_offset in vmcoreinfo for XIP kernel
riscv: cleanup XIP_FIXUP macro
riscv: change XIP's kernel_map.size to be size of the entire kernel
...
Link: https://lore.kernel.org/r/cover.1717789719.git.namcao@linutronix.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
314 files changed, 3896 insertions, 2275 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f1384c7b59c9..09126bb8cc9f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4798,11 +4798,9 @@ profile= [KNL] Enable kernel profiling via /proc/profile Format: [<profiletype>,]<number> - Param: <profiletype>: "schedule", "sleep", or "kvm" + Param: <profiletype>: "schedule" or "kvm" [defaults to kernel profiling] Param: "schedule" - profile schedule points. - Param: "sleep" - profile D-state sleeping (millisecs). - Requires CONFIG_SCHEDSTATS Param: "kvm" - profile VM exits. Param: <number> - step/bucket size as a power of 2 for statistical time based profiling. diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index bb83c5d8c675..50327c05be8d 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -122,10 +122,18 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A76 | #1490853 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A76 | #3324349 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1491015 | N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A77 | #3324348 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A78 | #3324344 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A78C | #3324346,3324347| ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | @@ -138,8 +146,14 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A720 | #3456091 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A725 | #3456106 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X1 | #1502854 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X1 | #3324344 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X1C | #3324346 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 | @@ -160,6 +174,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N1 | #3324349 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 | @@ -170,6 +186,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V1 | #1619801 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-V1 | #3324341 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V2 | #3324336 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 | diff --git a/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml b/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml index b5e5767d8698..13eaa8d9a16e 100644 --- a/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml +++ b/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml @@ -35,6 +35,9 @@ properties: ports-implemented: const: 1 + power-domains: + maxItems: 1 + sata-port@0: $ref: /schemas/ata/snps,dwc-ahci-common.yaml#/$defs/dwc-ahci-port diff --git a/Documentation/driver-api/thermal/sysfs-api.rst b/Documentation/driver-api/thermal/sysfs-api.rst index 6c1175c6afba..978198f8a18b 100644 --- a/Documentation/driver-api/thermal/sysfs-api.rst +++ b/Documentation/driver-api/thermal/sysfs-api.rst @@ -4,8 +4,6 @@ Generic Thermal Sysfs driver How To Written by Sujith Thomas <sujith.thomas@intel.com>, Zhang Rui <rui.zhang@intel.com> -Updated: 2 January 2008 - Copyright (c) 2008 Intel Corporation @@ -38,23 +36,23 @@ temperature) and throttle appropriate devices. :: - struct thermal_zone_device - *thermal_zone_device_register(char *type, - int trips, int mask, void *devdata, - struct thermal_zone_device_ops *ops, - const struct thermal_zone_params *tzp, - int passive_delay, int polling_delay)) + struct thermal_zone_device * + thermal_zone_device_register_with_trips(const char *type, + const struct thermal_trip *trips, + int num_trips, void *devdata, + const struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + unsigned int passive_delay, + unsigned int polling_delay) - This interface function adds a new thermal zone device (sensor) to + This interface function adds a new thermal zone device (sensor) to the /sys/class/thermal folder as `thermal_zone[0-*]`. It tries to bind all the - thermal cooling devices registered at the same time. + thermal cooling devices registered to it at the same time. type: the thermal zone type. trips: - the total number of trip points this thermal zone supports. - mask: - Bit string: If 'n'th bit is set, then trip point 'n' is writable. + the table of trip points for this thermal zone. devdata: device private data ops: @@ -67,32 +65,29 @@ temperature) and throttle appropriate devices. .get_temp: get the current temperature of the thermal zone. .set_trips: - set the trip points window. Whenever the current temperature - is updated, the trip points immediately below and above the - current temperature are found. - .get_mode: - get the current mode (enabled/disabled) of the thermal zone. - - - "enabled" means the kernel thermal management is - enabled. - - "disabled" will prevent kernel thermal driver action - upon trip points so that user applications can take - charge of thermal management. - .set_mode: - set the mode (enabled/disabled) of the thermal zone. - .get_trip_type: - get the type of certain trip point. - .get_trip_temp: - get the temperature above which the certain trip point - will be fired. + set the trip points window. Whenever the current temperature + is updated, the trip points immediately below and above the + current temperature are found. + .change_mode: + change the mode (enabled/disabled) of the thermal zone. + .set_trip_temp: + set the temperature of a given trip point. + .get_crit_temp: + get the critical temperature for this thermal zone. .set_emul_temp: - set the emulation temperature which helps in debugging - different threshold temperature points. + set the emulation temperature which helps in debugging + different threshold temperature points. + .get_trend: + get the trend of most recent zone temperature changes. + .hot: + hot trip point crossing handler. + .critical: + critical trip point crossing handler. tzp: thermal zone platform parameters. passive_delay: - number of milliseconds to wait between polls when - performing passive cooling. + number of milliseconds to wait between polls when performing passive + cooling. polling_delay: number of milliseconds to wait between polls when checking whether trip points have been crossed (0 for interrupt driven systems). diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 495e35fcfb21..ea21fe135b97 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1753,6 +1753,7 @@ operations: request: attributes: - header + - context reply: attributes: - header @@ -1761,7 +1762,6 @@ operations: - indir - hkey - input_xfrm - dump: *rss-get-op - name: plca-get-cfg doc: Get PLCA params. diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 3ab423b80e91..d5f246aceb9f 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1875,6 +1875,7 @@ Kernel response contents: ===================================== ====== ========================== ``ETHTOOL_A_RSS_HEADER`` nested reply header + ``ETHTOOL_A_RSS_CONTEXT`` u32 context number ``ETHTOOL_A_RSS_HFUNC`` u32 RSS hash func ``ETHTOOL_A_RSS_INDIR`` binary Indir table bytes ``ETHTOOL_A_RSS_HKEY`` binary Hash key bytes diff --git a/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst b/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst index f02e6cf3516a..74df19be91f6 100644 --- a/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst +++ b/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst @@ -21,9 +21,9 @@ are often referred to as greyscale formats. .. raw:: latex - \scriptsize + \tiny -.. tabularcolumns:: |p{3.6cm}|p{3.0cm}|p{1.3cm}|p{2.6cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}| +.. tabularcolumns:: |p{3.6cm}|p{2.4cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}| .. flat-table:: Luma-Only Image Formats :header-rows: 1 diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index fe722c5dada9..33938468d62d 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6368,7 +6368,7 @@ a single guest_memfd file, but the bound ranges must not overlap). See KVM_SET_USER_MEMORY_REGION2 for additional details. 4.143 KVM_PRE_FAULT_MEMORY ------------------------- +--------------------------- :Capability: KVM_CAP_PRE_FAULT_MEMORY :Architectures: none @@ -6405,6 +6405,12 @@ for the current vCPU state. KVM maps memory as if the vCPU generated a stage-2 read page fault, e.g. faults in memory as needed, but doesn't break CoW. However, KVM does not mark any newly created stage-2 PTE as Accessed. +In the case of confidential VM types where there is an initial set up of +private guest memory before the guest is 'finalized'/measured, this ioctl +should only be issued after completing all the necessary setup to put the +guest into a 'finalized' state so that the above semantics can be reliably +ensured. + In some cases, multiple vCPUs might share the page tables. In this case, the ioctl can be called in parallel. diff --git a/MAINTAINERS b/MAINTAINERS index 42decde38320..8766f3e5e87e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15936,6 +15936,7 @@ F: include/linux/in.h F: include/linux/indirect_call_wrapper.h F: include/linux/net.h F: include/linux/netdevice.h +F: include/linux/skbuff.h F: include/net/ F: include/uapi/linux/in.h F: include/uapi/linux/net.h @@ -18556,7 +18557,7 @@ F: drivers/usb/misc/qcom_eud.c QCOM IPA DRIVER M: Alex Elder <elder@kernel.org> L: netdev@vger.kernel.org -S: Supported +S: Maintained F: drivers/net/ipa/ QEMU MACHINE EMULATOR AND VIRTUALIZER SUPPORT @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index 2bb8cbeedf91..b191d87f89c4 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -534,8 +534,10 @@ extern inline void writeq(u64 b, volatile void __iomem *addr) #define ioread16be(p) swab16(ioread16(p)) #define ioread32be(p) swab32(ioread32(p)) +#define ioread64be(p) swab64(ioread64(p)) #define iowrite16be(v,p) iowrite16(swab16(v), (p)) #define iowrite32be(v,p) iowrite32(swab32(v), (p)) +#define iowrite64be(v,p) iowrite64(swab64(v), (p)) #define inb_p inb #define inw_p inw @@ -634,8 +636,6 @@ extern void outsl (unsigned long port, const void *src, unsigned long count); */ #define ioread64 ioread64 #define iowrite64 iowrite64 -#define ioread64be ioread64be -#define iowrite64be iowrite64be #define ioread8_rep ioread8_rep #define ioread16_rep ioread16_rep #define ioread32_rep ioread32_rep diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 954a1916a500..54b2bb817a7f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -87,6 +87,7 @@ config ARM select HAVE_ARCH_PFN_VALID select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT + select HAVE_ARCH_STACKLEAK select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARM_LPAE @@ -116,6 +117,7 @@ config ARM select HAVE_KERNEL_XZ select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M select HAVE_KRETPROBES if HAVE_KPROBES + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_OPTPROBES if !THUMB2_KERNEL @@ -736,7 +738,7 @@ config ARM_ERRATA_764319 bool "ARM errata: Read to DBGPRSR and DBGOSLSR may generate Undefined instruction" depends on CPU_V7 help - This option enables the workaround for the 764319 Cortex A-9 erratum. + This option enables the workaround for the 764319 Cortex-A9 erratum. CP14 read accesses to the DBGPRSR and DBGOSLSR registers generate an unexpected Undefined Instruction exception when the DBGSWENABLE external pin is set to 0, even when the CP14 accesses are performed diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 6bca03c0c7f0..945b5975fce2 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -9,6 +9,7 @@ OBJS = HEAD = head.o OBJS += misc.o decompress.o +CFLAGS_decompress.o += $(DISABLE_STACKLEAK_PLUGIN) ifeq ($(CONFIG_DEBUG_UNCOMPRESS),y) OBJS += debug.o AFLAGS_head.o += -DDEBUG diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index 3fcb3e62dc56..d411abd4310e 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -125,7 +125,7 @@ SECTIONS . = BSS_START; __bss_start = .; - .bss : { *(.bss) } + .bss : { *(.bss .bss.*) } _end = .; . = ALIGN(8); /* the stack must be 64-bit aligned */ diff --git a/arch/arm/boot/dts/arm/versatile-ab.dts b/arch/arm/boot/dts/arm/versatile-ab.dts index 6fe6b49f5d8e..635ab9268899 100644 --- a/arch/arm/boot/dts/arm/versatile-ab.dts +++ b/arch/arm/boot/dts/arm/versatile-ab.dts @@ -157,7 +157,7 @@ clocks = <&xtal24mhz>; }; - pclk: clock-24000000 { + pclk: clock-pclk { #clock-cells = <0>; compatible = "fixed-factor-clock"; clock-div = <1>; diff --git a/arch/arm/include/asm/stacktrace.h b/arch/arm/include/asm/stacktrace.h index 360f0d2406bf..f80a85b091d6 100644 --- a/arch/arm/include/asm/stacktrace.h +++ b/arch/arm/include/asm/stacktrace.h @@ -26,6 +26,13 @@ struct stackframe { #endif }; +static inline bool on_thread_stack(void) +{ + unsigned long delta = current_stack_pointer ^ (unsigned long)current->stack; + + return delta < THREAD_SIZE; +} + static __always_inline void arm_get_current_stackframe(struct pt_regs *regs, struct stackframe *frame) { diff --git a/arch/arm/include/asm/vmlinux.lds.h b/arch/arm/include/asm/vmlinux.lds.h index 4c8632d5c432..d60f6e83a9f7 100644 --- a/arch/arm/include/asm/vmlinux.lds.h +++ b/arch/arm/include/asm/vmlinux.lds.h @@ -42,7 +42,7 @@ #define PROC_INFO \ . = ALIGN(4); \ __proc_info_begin = .; \ - *(.proc.info.init) \ + KEEP(*(.proc.info.init)) \ __proc_info_end = .; #define IDMAP_TEXT \ diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 6150a716828c..f01d23a220e6 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1065,6 +1065,7 @@ vector_addrexcptn: .globl vector_fiq .section .vectors, "ax", %progbits + .reloc .text, R_ARM_NONE, . W(b) vector_rst W(b) vector_und ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_swi ) @@ -1078,6 +1079,7 @@ THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_swi ) #ifdef CONFIG_HARDEN_BRANCH_HISTORY .section .vectors.bhb.loop8, "ax", %progbits + .reloc .text, R_ARM_NONE, . W(b) vector_rst W(b) vector_bhb_loop8_und ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_loop8_swi ) @@ -1090,6 +1092,7 @@ THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_loop8_swi ) W(b) vector_bhb_loop8_fiq .section .vectors.bhb.bpiall, "ax", %progbits + .reloc .text, R_ARM_NONE, . W(b) vector_rst W(b) vector_bhb_bpiall_und ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_bpiall_swi ) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 5c31e9de7a60..f379c852dcb7 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -119,6 +119,9 @@ no_work_pending: ct_user_enter save = 0 +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + bl stackleak_erase_on_task_stack +#endif restore_user_regs fast = 0, offset = 0 ENDPROC(ret_to_user_from_irq) ENDPROC(ret_to_user) diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 677f218f7e84..da488d92e7a0 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -395,11 +395,6 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, return 0; } -struct mod_unwind_map { - const Elf_Shdr *unw_sec; - const Elf_Shdr *txt_sec; -}; - static const Elf_Shdr *find_mod_section(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, const char *name) { diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c index 7147edbe56c6..1d230ac9d0eb 100644 --- a/arch/arm/kernel/perf_callchain.c +++ b/arch/arm/kernel/perf_callchain.c @@ -85,8 +85,7 @@ static bool callchain_trace(void *data, unsigned long pc) { struct perf_callchain_entry_ctx *entry = data; - perf_callchain_store(entry, pc); - return true; + return perf_callchain_store(entry, pc) == 0; } void diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index c16d196b5aad..5eddb75a7174 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -63,7 +63,7 @@ SECTIONS . = ALIGN(4); __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; - ARM_MMU_KEEP(*(__ex_table)) + ARM_MMU_KEEP(KEEP(*(__ex_table))) __stop___ex_table = .; } @@ -83,7 +83,7 @@ SECTIONS } .init.arch.info : { __arch_info_begin = .; - *(.arch.info.init) + KEEP(*(.arch.info.init)) __arch_info_end = .; } .init.tagtable : { diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index bd9127c4b451..de373c6c2ae8 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -74,7 +74,7 @@ SECTIONS . = ALIGN(4); __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; - ARM_MMU_KEEP(*(__ex_table)) + ARM_MMU_KEEP(KEEP(*(__ex_table))) __stop___ex_table = .; } @@ -99,7 +99,7 @@ SECTIONS } .init.arch.info : { __arch_info_begin = .; - *(.arch.info.init) + KEEP(*(.arch.info.init)) __arch_info_end = .; } .init.tagtable : { @@ -116,7 +116,7 @@ SECTIONS #endif .init.pv_table : { __pv_table_begin = .; - *(.pv_table) + KEEP(*(.pv_table)) __pv_table_end = .; } diff --git a/arch/arm/mach-alpine/alpine_cpu_pm.c b/arch/arm/mach-alpine/alpine_cpu_pm.c index 13ae8412e9ce..b48da6f12b6c 100644 --- a/arch/arm/mach-alpine/alpine_cpu_pm.c +++ b/arch/arm/mach-alpine/alpine_cpu_pm.c @@ -29,7 +29,7 @@ int alpine_cpu_wakeup(unsigned int phys_cpu, uint32_t phys_resume_addr) /* * Set CPU resume address - * secure firmware running on boot will jump to this address - * after setting proper CPU mode, and initialiing e.g. secure + * after setting proper CPU mode, and initializing e.g. secure * regs (the same mode all CPUs are booted to - usually HYP) */ writel(phys_resume_addr, diff --git a/arch/arm/mm/proc.c b/arch/arm/mm/proc.c index bdbbf65d1b36..2027845efefb 100644 --- a/arch/arm/mm/proc.c +++ b/arch/arm/mm/proc.c @@ -17,7 +17,7 @@ void cpu_arm7tdmi_proc_init(void); __ADDRESSABLE(cpu_arm7tdmi_proc_init); void cpu_arm7tdmi_proc_fin(void); __ADDRESSABLE(cpu_arm7tdmi_proc_fin); -void cpu_arm7tdmi_reset(void); +void cpu_arm7tdmi_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm7tdmi_reset); int cpu_arm7tdmi_do_idle(void); __ADDRESSABLE(cpu_arm7tdmi_do_idle); @@ -32,7 +32,7 @@ void cpu_arm720_proc_init(void); __ADDRESSABLE(cpu_arm720_proc_init); void cpu_arm720_proc_fin(void); __ADDRESSABLE(cpu_arm720_proc_fin); -void cpu_arm720_reset(void); +void cpu_arm720_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm720_reset); int cpu_arm720_do_idle(void); __ADDRESSABLE(cpu_arm720_do_idle); @@ -49,7 +49,7 @@ void cpu_arm740_proc_init(void); __ADDRESSABLE(cpu_arm740_proc_init); void cpu_arm740_proc_fin(void); __ADDRESSABLE(cpu_arm740_proc_fin); -void cpu_arm740_reset(void); +void cpu_arm740_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm740_reset); int cpu_arm740_do_idle(void); __ADDRESSABLE(cpu_arm740_do_idle); @@ -64,7 +64,7 @@ void cpu_arm9tdmi_proc_init(void); __ADDRESSABLE(cpu_arm9tdmi_proc_init); void cpu_arm9tdmi_proc_fin(void); __ADDRESSABLE(cpu_arm9tdmi_proc_fin); -void cpu_arm9tdmi_reset(void); +void cpu_arm9tdmi_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm9tdmi_reset); int cpu_arm9tdmi_do_idle(void); __ADDRESSABLE(cpu_arm9tdmi_do_idle); @@ -79,7 +79,7 @@ void cpu_arm920_proc_init(void); __ADDRESSABLE(cpu_arm920_proc_init); void cpu_arm920_proc_fin(void); __ADDRESSABLE(cpu_arm920_proc_fin); -void cpu_arm920_reset(void); +void cpu_arm920_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm920_reset); int cpu_arm920_do_idle(void); __ADDRESSABLE(cpu_arm920_do_idle); @@ -102,7 +102,7 @@ void cpu_arm922_proc_init(void); __ADDRESSABLE(cpu_arm922_proc_init); void cpu_arm922_proc_fin(void); __ADDRESSABLE(cpu_arm922_proc_fin); -void cpu_arm922_reset(void); +void cpu_arm922_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm922_reset); int cpu_arm922_do_idle(void); __ADDRESSABLE(cpu_arm922_do_idle); @@ -119,7 +119,7 @@ void cpu_arm925_proc_init(void); __ADDRESSABLE(cpu_arm925_proc_init); void cpu_arm925_proc_fin(void); __ADDRESSABLE(cpu_arm925_proc_fin); -void cpu_arm925_reset(void); +void cpu_arm925_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm925_reset); int cpu_arm925_do_idle(void); __ADDRESSABLE(cpu_arm925_do_idle); @@ -159,7 +159,7 @@ void cpu_arm940_proc_init(void); __ADDRESSABLE(cpu_arm940_proc_init); void cpu_arm940_proc_fin(void); __ADDRESSABLE(cpu_arm940_proc_fin); -void cpu_arm940_reset(void); +void cpu_arm940_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm940_reset); int cpu_arm940_do_idle(void); __ADDRESSABLE(cpu_arm940_do_idle); @@ -174,7 +174,7 @@ void cpu_arm946_proc_init(void); __ADDRESSABLE(cpu_arm946_proc_init); void cpu_arm946_proc_fin(void); __ADDRESSABLE(cpu_arm946_proc_fin); -void cpu_arm946_reset(void); +void cpu_arm946_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm946_reset); int cpu_arm946_do_idle(void); __ADDRESSABLE(cpu_arm946_do_idle); @@ -429,7 +429,7 @@ void cpu_v7_proc_init(void); __ADDRESSABLE(cpu_v7_proc_init); void cpu_v7_proc_fin(void); __ADDRESSABLE(cpu_v7_proc_fin); -void cpu_v7_reset(void); +void cpu_v7_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_v7_reset); int cpu_v7_do_idle(void); __ADDRESSABLE(cpu_v7_do_idle); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b3fc891f1544..a2f8ff354ca6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1069,18 +1069,28 @@ config ARM64_ERRATUM_3117295 If unsure, say Y. config ARM64_ERRATUM_3194386 - bool "Cortex-{A720,X4,X925}/Neoverse-V3: workaround for MSR SSBS not self-synchronizing" + bool "Cortex-*/Neoverse-*: workaround for MSR SSBS not self-synchronizing" default y help This option adds the workaround for the following errata: + * ARM Cortex-A76 erratum 3324349 + * ARM Cortex-A77 erratum 3324348 + * ARM Cortex-A78 erratum 3324344 + * ARM Cortex-A78C erratum 3324346 + * ARM Cortex-A78C erratum 3324347 * ARM Cortex-A710 erratam 3324338 * ARM Cortex-A720 erratum 3456091 + * ARM Cortex-A725 erratum 3456106 + * ARM Cortex-X1 erratum 3324344 + * ARM Cortex-X1C erratum 3324346 * ARM Cortex-X2 erratum 3324338 * ARM Cortex-X3 erratum 3324335 * ARM Cortex-X4 erratum 3194386 * ARM Cortex-X925 erratum 3324334 + * ARM Neoverse-N1 erratum 3324349 * ARM Neoverse N2 erratum 3324339 + * ARM Neoverse-V1 erratum 3324341 * ARM Neoverse V2 erratum 3324336 * ARM Neoverse-V3 erratum 3312417 @@ -1088,11 +1098,11 @@ config ARM64_ERRATUM_3194386 subsequent speculative instructions, which may permit unexepected speculative store bypassing. - Work around this problem by placing a speculation barrier after - kernel changes to SSBS. The presence of the SSBS special-purpose - register is hidden from hwcaps and EL0 reads of ID_AA64PFR1_EL1, such - that userspace will use the PR_SPEC_STORE_BYPASS prctl to change - SSBS. + Work around this problem by placing a Speculation Barrier (SB) or + Instruction Synchronization Barrier (ISB) after kernel changes to + SSBS. The presence of the SSBS special-purpose register is hidden + from hwcaps and EL0 reads of ID_AA64PFR1_EL1, such that userspace + will use the PR_SPEC_STORE_BYPASS prctl to change SSBS. If unsure, say Y. diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 1cb0704c6163..5fd7caea4419 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,12 +86,14 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_CORTEX_X1C 0xD4C #define ARM_CPU_PART_CORTEX_X3 0xD4E #define ARM_CPU_PART_NEOVERSE_V2 0xD4F #define ARM_CPU_PART_CORTEX_A720 0xD81 #define ARM_CPU_PART_CORTEX_X4 0xD82 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 #define ARM_CPU_PART_CORTEX_X925 0xD85 +#define ARM_CPU_PART_CORTEX_A725 0xD87 #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -165,12 +167,14 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) #define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) #define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) #define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) +#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 4e753908b801..a0a5bbae7229 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -13,6 +13,7 @@ #include <linux/types.h> #include <asm/insn.h> +#define HAVE_JUMP_LABEL_BATCH #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE #define JUMP_TABLE_ENTRY(key, label) \ diff --git a/arch/arm64/kernel/Makefile.syscalls b/arch/arm64/kernel/Makefile.syscalls index 3cfafd003b2d..0542a718871a 100644 --- a/arch/arm64/kernel/Makefile.syscalls +++ b/arch/arm64/kernel/Makefile.syscalls @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 syscall_abis_32 += -syscall_abis_64 += renameat newstat rlimit memfd_secret +syscall_abis_64 += renameat rlimit memfd_secret syscalltbl = arch/arm64/tools/syscall_%.tbl diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 617424b73f8c..f6b6b4507357 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -434,15 +434,24 @@ static const struct midr_range erratum_spec_unpriv_load_list[] = { #ifdef CONFIG_ARM64_ERRATUM_3194386 static const struct midr_range erratum_spec_ssbs_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), - MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), {} }; #endif diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index faf88ec9c48e..f63ea915d6ad 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -7,11 +7,12 @@ */ #include <linux/kernel.h> #include <linux/jump_label.h> +#include <linux/smp.h> #include <asm/insn.h> #include <asm/patching.h> -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) +bool arch_jump_label_transform_queue(struct jump_entry *entry, + enum jump_label_type type) { void *addr = (void *)jump_entry_code(entry); u32 insn; @@ -25,4 +26,10 @@ void arch_jump_label_transform(struct jump_entry *entry, } aarch64_insn_patch_text_nosync(addr, insn); + return true; +} + +void arch_jump_label_transform_apply(void) +{ + kick_all_cpus_sync(); } diff --git a/arch/loongarch/kernel/Makefile.syscalls b/arch/loongarch/kernel/Makefile.syscalls index 523bb411a3bc..ab7d9baa2915 100644 --- a/arch/loongarch/kernel/Makefile.syscalls +++ b/arch/loongarch/kernel/Makefile.syscalls @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -syscall_abis_64 += newstat +# No special ABIs on loongarch so far +syscall_abis_64 += diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 5d650e02cbf4..b0a2ac3ba916 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -20,6 +20,7 @@ config PARISC select ARCH_SUPPORTS_HUGETLBFS if PA20 select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_STACKWALK + select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VM_PGTABLE select HAVE_RELIABLE_STACKTRACE select DMA_OPS diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 2a60d7a72f1f..a3f0f100f219 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -20,7 +20,16 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#ifdef CONFIG_PA20 +#define ARCH_DMA_MINALIGN 128 +#else +#define ARCH_DMA_MINALIGN 32 +#endif +#define ARCH_KMALLOC_MINALIGN 16 /* ldcw requires 16-byte alignment */ + +#define arch_slab_minalign() ((unsigned)dcache_stride) +#define cache_line_size() dcache_stride +#define dma_get_cache_alignment cache_line_size #define __read_mostly __section(".data..read_mostly") diff --git a/arch/parisc/net/bpf_jit_core.c b/arch/parisc/net/bpf_jit_core.c index 979f45d4d1fb..06cbcd6fe87b 100644 --- a/arch/parisc/net/bpf_jit_core.c +++ b/arch/parisc/net/bpf_jit_core.c @@ -114,7 +114,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) jit_data->header = bpf_jit_binary_alloc(prog_size + extable_size, &jit_data->image, - sizeof(u32), + sizeof(long), bpf_fill_ill_insns); if (!jit_data->header) { prog = orig_prog; diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 7ede2111c591..32d308a3355f 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -112,11 +112,13 @@ struct kernel_mapping { /* Offset between linear mapping virtual address and kernel load address */ unsigned long va_pa_offset; /* Offset between kernel mapping virtual address and kernel load address */ - unsigned long va_kernel_pa_offset; - unsigned long va_kernel_xip_pa_offset; #ifdef CONFIG_XIP_KERNEL + unsigned long va_kernel_xip_text_pa_offset; + unsigned long va_kernel_xip_data_pa_offset; uintptr_t xiprom; uintptr_t xiprom_sz; +#else + unsigned long va_kernel_pa_offset; #endif }; @@ -134,12 +136,18 @@ extern phys_addr_t phys_ram_base; #else void *linear_mapping_pa_to_va(unsigned long x); #endif + +#ifdef CONFIG_XIP_KERNEL #define kernel_mapping_pa_to_va(y) ({ \ unsigned long _y = (unsigned long)(y); \ - (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \ - (void *)(_y + kernel_map.va_kernel_xip_pa_offset) : \ - (void *)(_y + kernel_map.va_kernel_pa_offset + XIP_OFFSET); \ + (_y < phys_ram_base) ? \ + (void *)(_y + kernel_map.va_kernel_xip_text_pa_offset) : \ + (void *)(_y + kernel_map.va_kernel_xip_data_pa_offset); \ }) +#else +#define kernel_mapping_pa_to_va(y) ((void *)((unsigned long)(y) + kernel_map.va_kernel_pa_offset)) +#endif + #define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x) #ifndef CONFIG_DEBUG_VIRTUAL @@ -147,12 +155,17 @@ void *linear_mapping_pa_to_va(unsigned long x); #else phys_addr_t linear_mapping_va_to_pa(unsigned long x); #endif + +#ifdef CONFIG_XIP_KERNEL #define kernel_mapping_va_to_pa(y) ({ \ unsigned long _y = (unsigned long)(y); \ - (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \ - (_y - kernel_map.va_kernel_xip_pa_offset) : \ - (_y - kernel_map.va_kernel_pa_offset - XIP_OFFSET); \ + (_y < kernel_map.virt_addr + kernel_map.xiprom_sz) ? \ + (_y - kernel_map.va_kernel_xip_text_pa_offset) : \ + (_y - kernel_map.va_kernel_xip_data_pa_offset); \ }) +#else +#define kernel_mapping_va_to_pa(y) ((unsigned long)(y) - kernel_map.va_kernel_pa_offset) +#endif #define __va_to_pa_nodebug(x) ({ \ unsigned long _x = x; \ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 089f3c9f56a3..ede2f921967c 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -107,13 +107,6 @@ #endif -#ifdef CONFIG_XIP_KERNEL -#define XIP_OFFSET SZ_32M -#define XIP_OFFSET_MASK (SZ_32M - 1) -#else -#define XIP_OFFSET 0 -#endif - #ifndef __ASSEMBLY__ #include <asm/page.h> @@ -142,11 +135,14 @@ #ifdef CONFIG_XIP_KERNEL #define XIP_FIXUP(addr) ({ \ + extern char _sdata[], _start[], _end[]; \ + uintptr_t __rom_start_data = CONFIG_XIP_PHYS_ADDR \ + + (uintptr_t)&_sdata - (uintptr_t)&_start; \ + uintptr_t __rom_end_data = CONFIG_XIP_PHYS_ADDR \ + + (uintptr_t)&_end - (uintptr_t)&_start; \ uintptr_t __a = (uintptr_t)(addr); \ - (__a >= CONFIG_XIP_PHYS_ADDR && \ - __a < CONFIG_XIP_PHYS_ADDR + XIP_OFFSET * 2) ? \ - __a - CONFIG_XIP_PHYS_ADDR + CONFIG_PHYS_RAM_BASE - XIP_OFFSET :\ - __a; \ + (__a >= __rom_start_data && __a < __rom_end_data) ? \ + __a - __rom_start_data + CONFIG_PHYS_RAM_BASE : __a; \ }) #else #define XIP_FIXUP(addr) (addr) diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h index ec11001c3fe0..ab92fc84e1fc 100644 --- a/arch/riscv/include/asm/set_memory.h +++ b/arch/riscv/include/asm/set_memory.h @@ -46,7 +46,7 @@ bool kernel_page_present(struct page *page); #endif /* __ASSEMBLY__ */ -#ifdef CONFIG_STRICT_KERNEL_RWX +#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_XIP_KERNEL) #ifdef CONFIG_64BIT #define SECTION_ALIGN (1 << 21) #else diff --git a/arch/riscv/include/asm/xip_fixup.h b/arch/riscv/include/asm/xip_fixup.h index b65bf6306f69..f3d56299bc22 100644 --- a/arch/riscv/include/asm/xip_fixup.h +++ b/arch/riscv/include/asm/xip_fixup.h @@ -9,18 +9,36 @@ #ifdef CONFIG_XIP_KERNEL .macro XIP_FIXUP_OFFSET reg - REG_L t0, _xip_fixup + /* Fix-up address in Flash into address in RAM early during boot before + * MMU is up. Because generated code "thinks" data is in Flash, but it + * is actually in RAM (actually data is also in Flash, but Flash is + * read-only, thus we need to use the data residing in RAM). + * + * The start of data in Flash is _sdata and the start of data in RAM is + * CONFIG_PHYS_RAM_BASE. So this fix-up essentially does this: + * reg += CONFIG_PHYS_RAM_BASE - _start + */ + li t0, CONFIG_PHYS_RAM_BASE add \reg, \reg, t0 + la t0, _sdata + sub \reg, \reg, t0 .endm .macro XIP_FIXUP_FLASH_OFFSET reg + /* In linker script, at the transition from read-only section to + * writable section, the VMA is increased while LMA remains the same. + * (See in linker script how _sdata, __data_loc and LOAD_OFFSET is + * changed) + * + * Consequently, early during boot before MMU is up, the generated code + * reads the "writable" section at wrong addresses, because VMA is used + * by compiler to generate code, but the data is located in Flash using + * LMA. + */ + la t0, _sdata + sub \reg, \reg, t0 la t0, __data_loc - REG_L t1, _xip_phys_offset - sub \reg, \reg, t1 add \reg, \reg, t0 .endm - -_xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET -_xip_phys_offset: .dword CONFIG_XIP_PHYS_ADDR + XIP_OFFSET #else .macro XIP_FIXUP_OFFSET reg .endm diff --git a/arch/riscv/kernel/Makefile.syscalls b/arch/riscv/kernel/Makefile.syscalls index 52087a023b3d..9668fd1faf60 100644 --- a/arch/riscv/kernel/Makefile.syscalls +++ b/arch/riscv/kernel/Makefile.syscalls @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 syscall_abis_32 += riscv memfd_secret -syscall_abis_64 += riscv newstat rlimit memfd_secret +syscall_abis_64 += riscv rlimit memfd_secret diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 8f20607adb40..b427188b28fc 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -432,28 +432,26 @@ static void __init riscv_resolve_isa(unsigned long *source_isa, bitmap_copy(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX); for_each_set_bit(bit, source_isa, RISCV_ISA_EXT_MAX) { ext = riscv_get_isa_ext_data(bit); - if (!ext) - continue; - if (ext->validate) { + if (ext && ext->validate) { ret = ext->validate(ext, resolved_isa); if (ret == -EPROBE_DEFER) { loop = true; continue; } else if (ret) { /* Disable the extension entirely */ - clear_bit(ext->id, source_isa); + clear_bit(bit, source_isa); continue; } } - set_bit(ext->id, resolved_isa); + set_bit(bit, resolved_isa); /* No need to keep it in source isa now that it is enabled */ - clear_bit(ext->id, source_isa); + clear_bit(bit, source_isa); /* Single letter extensions get set in hwcap */ - if (ext->id < RISCV_ISA_EXT_BASE) - *this_hwcap |= isa2hwcap[ext->id]; + if (bit < RISCV_ISA_EXT_BASE) + *this_hwcap |= isa2hwcap[bit]; } } while (loop && memcmp(prev_resolved_isa, resolved_isa, sizeof(prev_resolved_isa))); } diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 69e5796fc51f..34ef522f07a8 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -205,6 +205,8 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len) int ret; ret = patch_insn_set(addr, c, len); + if (!ret) + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } @@ -239,6 +241,8 @@ int patch_text_nosync(void *addr, const void *insns, size_t len) int ret; ret = patch_insn_write(addr, insns, len); + if (!ret) + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } diff --git a/arch/riscv/kernel/sbi-ipi.c b/arch/riscv/kernel/sbi-ipi.c index 1026e22955cc..0cc5559c08d8 100644 --- a/arch/riscv/kernel/sbi-ipi.c +++ b/arch/riscv/kernel/sbi-ipi.c @@ -71,7 +71,7 @@ void __init sbi_ipi_init(void) * the masking/unmasking of virtual IPIs is done * via generic IPI-Mux */ - cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + cpuhp_setup_state(CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, "irqchip/sbi-ipi:starting", sbi_ipi_starting_cpu, NULL); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 05a16b1f0aee..51ebfd23e007 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -319,6 +319,7 @@ void do_trap_ecall_u(struct pt_regs *regs) regs->epc += 4; regs->orig_a0 = regs->a0; + regs->a0 = -ENOSYS; riscv_v_vstate_discard(regs); @@ -328,8 +329,7 @@ void do_trap_ecall_u(struct pt_regs *regs) if (syscall >= 0 && syscall < NR_syscalls) syscall_handler(regs, syscall); - else if (syscall != -1) - regs->a0 = -ENOSYS; + /* * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), * so the maximum stack offset is 1k bytes (10 bits). diff --git a/arch/riscv/kernel/vmcore_info.c b/arch/riscv/kernel/vmcore_info.c index 6d7a22522d63..d5e448aa90e7 100644 --- a/arch/riscv/kernel/vmcore_info.c +++ b/arch/riscv/kernel/vmcore_info.c @@ -19,6 +19,13 @@ void arch_crash_save_vmcoreinfo(void) #endif #endif vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); +#ifdef CONFIG_XIP_KERNEL + /* TODO: Communicate with crash-utility developers on the information to + * export. The XIP case is more complicated, because the virtual-physical + * address offset depends on whether the address is in ROM or in RAM. + */ +#else vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", kernel_map.va_kernel_pa_offset); +#endif } diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index 8c3daa1b0531..a7611789bad5 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -14,6 +14,7 @@ #include <asm/page.h> #include <asm/cache.h> #include <asm/thread_info.h> +#include <asm/set_memory.h> OUTPUT_ARCH(riscv) ENTRY(_start) @@ -65,10 +66,10 @@ SECTIONS * From this point, stuff is considered writable and will be copied to RAM */ __data_loc = ALIGN(PAGE_SIZE); /* location in file */ - . = KERNEL_LINK_ADDR + XIP_OFFSET; /* location in memory */ + . = ALIGN(SECTION_ALIGN); /* location in memory */ #undef LOAD_OFFSET -#define LOAD_OFFSET (KERNEL_LINK_ADDR + XIP_OFFSET - (__data_loc & XIP_OFFSET_MASK)) +#define LOAD_OFFSET (KERNEL_LINK_ADDR + _sdata - __data_loc) _sdata = .; /* Start of data section */ _data = .; diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 5224f3733802..a9f2b4af8f3f 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -61,26 +61,27 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr) static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) { + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } + if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ - if (!user_mode(regs)) { - no_context(regs, addr); - return; - } pagefault_out_of_memory(); return; } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) { - no_context(regs, addr); - return; - } do_trap(regs, SIGBUS, BUS_ADRERR, addr); return; + } else if (fault & VM_FAULT_SIGSEGV) { + do_trap(regs, SIGSEGV, SEGV_MAPERR, addr); + return; } + BUG(); } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 849a7e4daff7..f43139db82a9 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -234,8 +234,6 @@ static void __init setup_bootmem(void) */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - phys_ram_end = memblock_end_of_DRAM(); - /* * Make sure we align the start of the memory on a PMD boundary so that * at worst, we map the linear mapping with PMD mappings. @@ -251,6 +249,16 @@ static void __init setup_bootmem(void) kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base; /* + * The size of the linear page mapping may restrict the amount of + * usable RAM. + */ + if (IS_ENABLED(CONFIG_64BIT)) { + max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE; + memblock_cap_memory_range(phys_ram_base, + max_mapped_addr - phys_ram_base); + } + + /* * Reserve physical address space that would be mapped to virtual * addresses greater than (void *)(-PAGE_SIZE) because: * - This memory would overlap with ERR_PTR @@ -266,6 +274,7 @@ static void __init setup_bootmem(void) memblock_reserve(max_mapped_addr, (phys_addr_t)-max_mapped_addr); } + phys_ram_end = memblock_end_of_DRAM(); min_low_pfn = PFN_UP(phys_ram_base); max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); @@ -908,7 +917,7 @@ static void __init relocate_kernel(void) static void __init create_kernel_page_table(pgd_t *pgdir, __always_unused bool early) { - uintptr_t va, end_va; + uintptr_t va, start_va, end_va; /* Map the flash resident part */ end_va = kernel_map.virt_addr + kernel_map.xiprom_sz; @@ -918,10 +927,11 @@ static void __init create_kernel_page_table(pgd_t *pgdir, PMD_SIZE, PAGE_KERNEL_EXEC); /* Map the data in RAM */ - end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size; - for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE) + start_va = kernel_map.virt_addr + (uintptr_t)&_sdata - (uintptr_t)&_start; + end_va = kernel_map.virt_addr + kernel_map.size; + for (va = start_va; va < end_va; va += PMD_SIZE) create_pgd_mapping(pgdir, va, - kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)), + kernel_map.phys_addr + (va - start_va), PMD_SIZE, PAGE_KERNEL); } #else @@ -1090,13 +1100,16 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) phys_ram_base = CONFIG_PHYS_RAM_BASE; kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; - kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata); + kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start); - kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom; + kernel_map.va_kernel_xip_text_pa_offset = kernel_map.virt_addr - kernel_map.xiprom; + kernel_map.va_kernel_xip_data_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr + + (uintptr_t)&_sdata - (uintptr_t)&_start; #else kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); kernel_map.phys_addr = (uintptr_t)(&_start); kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr; + kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; #endif #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) @@ -1118,7 +1131,6 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) */ kernel_map.va_pa_offset = IS_ENABLED(CONFIG_64BIT) ? 0UL : PAGE_OFFSET - kernel_map.phys_addr; - kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; /* * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit @@ -1287,8 +1299,6 @@ static void __init create_linear_mapping_page_table(void) if (start <= __pa(PAGE_OFFSET) && __pa(PAGE_OFFSET) < end) start = __pa(PAGE_OFFSET); - if (end >= __pa(PAGE_OFFSET) + memory_limit) - end = __pa(PAGE_OFFSET) + memory_limit; create_linear_mapping_range(start, end, 0, NULL); } diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S index 5bcf3af903da..0e6ca6d5ae4b 100644 --- a/arch/riscv/purgatory/entry.S +++ b/arch/riscv/purgatory/entry.S @@ -7,6 +7,7 @@ * Author: Li Zhengyu (lizhengyu3@huawei.com) * */ +#include <asm/asm.h> #include <linux/linkage.h> .text @@ -34,6 +35,7 @@ SYM_CODE_END(purgatory_start) .data +.align LGREG SYM_DATA(riscv_kernel_entry, .quad 0) .end diff --git a/arch/s390/kernel/alternative.h b/arch/s390/kernel/alternative.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/arch/s390/kernel/alternative.h +++ /dev/null diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index fa90bbdc5ef9..6f2e87920288 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -113,7 +113,7 @@ void load_fpu_state(struct fpu *state, int flags) int mask; if (flags & KERNEL_FPC) - fpu_lfpc(&state->fpc); + fpu_lfpc_safe(&state->fpc); if (!cpu_has_vx()) { if (flags & KERNEL_VXR_V0V7) load_fp_regs_vx(state->vxrs); diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 975c654cf5a5..e67cd409b858 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -59,14 +59,6 @@ SECTIONS } :text = 0x0700 RO_DATA(PAGE_SIZE) - .data.rel.ro : { - *(.data.rel.ro .data.rel.ro.*) - } - .got : { - __got_start = .; - *(.got) - __got_end = .; - } . = ALIGN(PAGE_SIZE); _sdata = .; /* Start of data section */ @@ -80,6 +72,15 @@ SECTIONS . = ALIGN(PAGE_SIZE); __end_ro_after_init = .; + .data.rel.ro : { + *(.data.rel.ro .data.rel.ro.*) + } + .got : { + __got_start = .; + *(.got) + __got_end = .; + } + RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE) .data.rel : { *(.data.rel*) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 98dab3e049de..0a67fcee4414 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -3,6 +3,7 @@ #include <linux/ptdump.h> #include <linux/seq_file.h> #include <linux/debugfs.h> +#include <linux/sort.h> #include <linux/mm.h> #include <linux/kfence.h> #include <linux/kasan.h> @@ -15,13 +16,15 @@ static unsigned long max_addr; struct addr_marker { + int is_start; unsigned long start_address; const char *name; }; enum address_markers_idx { - IDENTITY_BEFORE_NR = 0, - IDENTITY_BEFORE_END_NR, + KVA_NR = 0, + LOWCORE_START_NR, + LOWCORE_END_NR, AMODE31_START_NR, AMODE31_END_NR, KERNEL_START_NR, @@ -30,8 +33,8 @@ enum address_markers_idx { KFENCE_START_NR, KFENCE_END_NR, #endif - IDENTITY_AFTER_NR, - IDENTITY_AFTER_END_NR, + IDENTITY_START_NR, + IDENTITY_END_NR, VMEMMAP_NR, VMEMMAP_END_NR, VMALLOC_NR, @@ -59,43 +62,44 @@ enum address_markers_idx { }; static struct addr_marker address_markers[] = { - [IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"}, - [IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"}, - [AMODE31_START_NR] = {0, "Amode31 Area Start"}, - [AMODE31_END_NR] = {0, "Amode31 Area End"}, - [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"}, - [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"}, + [KVA_NR] = {0, 0, "Kernel Virtual Address Space"}, + [LOWCORE_START_NR] = {1, 0, "Lowcore Start"}, + [LOWCORE_END_NR] = {0, 0, "Lowcore End"}, + [IDENTITY_START_NR] = {1, 0, "Identity Mapping Start"}, + [IDENTITY_END_NR] = {0, 0, "Identity Mapping End"}, + [AMODE31_START_NR] = {1, 0, "Amode31 Area Start"}, + [AMODE31_END_NR] = {0, 0, "Amode31 Area End"}, + [KERNEL_START_NR] = {1, (unsigned long)_stext, "Kernel Image Start"}, + [KERNEL_END_NR] = {0, (unsigned long)_end, "Kernel Image End"}, #ifdef CONFIG_KFENCE - [KFENCE_START_NR] = {0, "KFence Pool Start"}, - [KFENCE_END_NR] = {0, "KFence Pool End"}, + [KFENCE_START_NR] = {1, 0, "KFence Pool Start"}, + [KFENCE_END_NR] = {0, 0, "KFence Pool End"}, #endif - [IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"}, - [IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"}, - [VMEMMAP_NR] = {0, "vmemmap Area Start"}, - [VMEMMAP_END_NR] = {0, "vmemmap Area End"}, - [VMALLOC_NR] = {0, "vmalloc Area Start"}, - [VMALLOC_END_NR] = {0, "vmalloc Area End"}, + [VMEMMAP_NR] = {1, 0, "vmemmap Area Start"}, + [VMEMMAP_END_NR] = {0, 0, "vmemmap Area End"}, + [VMALLOC_NR] = {1, 0, "vmalloc Area Start"}, + [VMALLOC_END_NR] = {0, 0, "vmalloc Area End"}, #ifdef CONFIG_KMSAN - [KMSAN_VMALLOC_SHADOW_START_NR] = {0, "Kmsan vmalloc Shadow Start"}, - [KMSAN_VMALLOC_SHADOW_END_NR] = {0, "Kmsan vmalloc Shadow End"}, - [KMSAN_VMALLOC_ORIGIN_START_NR] = {0, "Kmsan vmalloc Origins Start"}, - [KMSAN_VMALLOC_ORIGIN_END_NR] = {0, "Kmsan vmalloc Origins End"}, - [KMSAN_MODULES_SHADOW_START_NR] = {0, "Kmsan Modules Shadow Start"}, - [KMSAN_MODULES_SHADOW_END_NR] = {0, "Kmsan Modules Shadow End"}, - [KMSAN_MODULES_ORIGIN_START_NR] = {0, "Kmsan Modules Origins Start"}, - [KMSAN_MODULES_ORIGIN_END_NR] = {0, "Kmsan Modules Origins End"}, + [KMSAN_VMALLOC_SHADOW_START_NR] = {1, 0, "Kmsan vmalloc Shadow Start"}, + [KMSAN_VMALLOC_SHADOW_END_NR] = {0, 0, "Kmsan vmalloc Shadow End"}, + [KMSAN_VMALLOC_ORIGIN_START_NR] = {1, 0, "Kmsan vmalloc Origins Start"}, + [KMSAN_VMALLOC_ORIGIN_END_NR] = {0, 0, "Kmsan vmalloc Origins End"}, + [KMSAN_MODULES_SHADOW_START_NR] = {1, 0, "Kmsan Modules Shadow Start"}, + [KMSAN_MODULES_SHADOW_END_NR] = {0, 0, "Kmsan Modules Shadow End"}, + [KMSAN_MODULES_ORIGIN_START_NR] = {1, 0, "Kmsan Modules Origins Start"}, + [KMSAN_MODULES_ORIGIN_END_NR] = {0, 0, "Kmsan Modules Origins End"}, #endif - [MODULES_NR] = {0, "Modules Area Start"}, - [MODULES_END_NR] = {0, "Modules Area End"}, - [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"}, - [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"}, - [MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"}, - [MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"}, + [MODULES_NR] = {1, 0, "Modules Area Start"}, + [MODULES_END_NR] = {0, 0, "Modules Area End"}, + [ABS_LOWCORE_NR] = {1, 0, "Lowcore Area Start"}, + [ABS_LOWCORE_END_NR] = {0, 0, "Lowcore Area End"}, + [MEMCPY_REAL_NR] = {1, 0, "Real Memory Copy Area Start"}, + [MEMCPY_REAL_END_NR] = {0, 0, "Real Memory Copy Area End"}, #ifdef CONFIG_KASAN - [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"}, - [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"}, + [KASAN_SHADOW_START_NR] = {1, KASAN_SHADOW_START, "Kasan Shadow Start"}, + [KASAN_SHADOW_END_NR] = {0, KASAN_SHADOW_END, "Kasan Shadow End"}, #endif - { -1, NULL } + {1, -1UL, NULL} }; struct pg_state { @@ -163,6 +167,19 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) st->wx_pages += (addr - st->start_address) / PAGE_SIZE; } +static void note_page_update_state(struct pg_state *st, unsigned long addr, unsigned int prot, int level) +{ + struct seq_file *m = st->seq; + + while (addr >= st->marker[1].start_address) { + st->marker++; + pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); + } + st->start_address = addr; + st->current_prot = prot; + st->level = level; +} + static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val) { int width = sizeof(unsigned long) * 2; @@ -186,9 +203,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, addr = max_addr; if (st->level == -1) { pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); - st->start_address = addr; - st->current_prot = prot; - st->level = level; + note_page_update_state(st, addr, prot, level); } else if (prot != st->current_prot || level != st->level || addr >= st->marker[1].start_address) { note_prot_wx(st, addr); @@ -202,13 +217,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, } pt_dump_seq_printf(m, "%9lu%c ", delta, *unit); print_prot(m, st->current_prot, st->level); - while (addr >= st->marker[1].start_address) { - st->marker++; - pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); - } - st->start_address = addr; - st->current_prot = prot; - st->level = level; + note_page_update_state(st, addr, prot, level); } } @@ -280,22 +289,25 @@ static int ptdump_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(ptdump); #endif /* CONFIG_PTDUMP_DEBUGFS */ -/* - * Heapsort from lib/sort.c is not a stable sorting algorithm, do a simple - * insertion sort to preserve the original order of markers with the same - * start address. - */ -static void sort_address_markers(void) +static int ptdump_cmp(const void *a, const void *b) { - struct addr_marker tmp; - int i, j; + const struct addr_marker *ama = a; + const struct addr_marker *amb = b; - for (i = 1; i < ARRAY_SIZE(address_markers) - 1; i++) { - tmp = address_markers[i]; - for (j = i - 1; j >= 0 && address_markers[j].start_address > tmp.start_address; j--) - address_markers[j + 1] = address_markers[j]; - address_markers[j + 1] = tmp; - } + if (ama->start_address > amb->start_address) + return 1; + if (ama->start_address < amb->start_address) + return -1; + /* + * If the start addresses of two markers are identical consider the + * marker which defines the start of an area higher than the one which + * defines the end of an area. This keeps pairs of markers sorted. + */ + if (ama->is_start) + return 1; + if (amb->is_start) + return -1; + return 0; } static int pt_dump_init(void) @@ -303,6 +315,8 @@ static int pt_dump_init(void) #ifdef CONFIG_KFENCE unsigned long kfence_start = (unsigned long)__kfence_pool; #endif + unsigned long lowcore = (unsigned long)get_lowcore(); + /* * Figure out the maximum virtual address being accessible with the * kernel ASCE. We need this to keep the page table walker functions @@ -310,7 +324,10 @@ static int pt_dump_init(void) */ max_addr = (get_lowcore()->kernel_asce.val & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); - address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; + address_markers[LOWCORE_START_NR].start_address = lowcore; + address_markers[LOWCORE_END_NR].start_address = lowcore + sizeof(struct lowcore); + address_markers[IDENTITY_START_NR].start_address = __identity_base; + address_markers[IDENTITY_END_NR].start_address = __identity_base + ident_map_size; address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31; address_markers[AMODE31_END_NR].start_address = (unsigned long)__eamode31; address_markers[MODULES_NR].start_address = MODULES_VADDR; @@ -337,7 +354,8 @@ static int pt_dump_init(void) address_markers[KMSAN_MODULES_ORIGIN_START_NR].start_address = KMSAN_MODULES_ORIGIN_START; address_markers[KMSAN_MODULES_ORIGIN_END_NR].start_address = KMSAN_MODULES_ORIGIN_END; #endif - sort_address_markers(); + sort(address_markers, ARRAY_SIZE(address_markers) - 1, + sizeof(address_markers[0]), ptdump_cmp, NULL); #ifdef CONFIG_PTDUMP_DEBUGFS debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); #endif /* CONFIG_PTDUMP_DEBUGFS */ diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ddcd39ef4346..e3d258f9e726 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -108,6 +108,8 @@ void mark_rodata_ro(void) { unsigned long size = __end_ro_after_init - __start_ro_after_init; + if (MACHINE_HAS_NX) + system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT); __set_memory_ro(__start_ro_after_init, __end_ro_after_init); pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); } @@ -170,13 +172,6 @@ void __init mem_init(void) setup_zero_pages(); /* Setup zeroed pages. */ } -void free_initmem(void) -{ - set_memory_rwnx((unsigned long)_sinittext, - (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT); - free_initmem_default(POISON_FREE_INITMEM); -} - unsigned long memory_block_size_bytes(void) { /* diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 41c714e21292..665b8228afeb 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -661,7 +661,6 @@ void __init vmem_map_init(void) { __set_memory_rox(_stext, _etext); __set_memory_ro(_etext, __end_rodata); - __set_memory_rox(_sinittext, _einittext); __set_memory_rox(__stext_amode31, __etext_amode31); /* * If the BEAR-enhancement facility is not installed the first @@ -670,16 +669,8 @@ void __init vmem_map_init(void) */ if (!static_key_enabled(&cpu_has_bear)) set_memory_x(0, 1); - if (debug_pagealloc_enabled()) { - /* - * Use RELOC_HIDE() as long as __va(0) translates to NULL, - * since performing pointer arithmetic on a NULL pointer - * has undefined behavior and generates compiler warnings. - */ - __set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size)); - } - if (MACHINE_HAS_NX) - system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT); + if (debug_pagealloc_enabled()) + __set_memory_4k(__va(0), __va(0) + ident_map_size); pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 082d61d85dfc..de1df0cb45da 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -163,7 +163,7 @@ struct sev_config { */ use_cas : 1, - __reserved : 62; + __reserved : 61; }; static struct sev_config sev_cfg __read_mostly; diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 83073fa3c989..7093ee21c0d1 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -344,6 +344,7 @@ 332 common statx sys_statx 333 common io_pgetevents sys_io_pgetevents 334 common rseq sys_rseq +335 common uretprobe sys_uretprobe # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal sys_pidfd_send_signal @@ -385,7 +386,6 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal -467 common uretprobe sys_uretprobe # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 12f2a0c14d33..be01823b1bb4 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1520,20 +1520,23 @@ static void x86_pmu_start(struct perf_event *event, int flags) void perf_event_print_debug(void) { u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; + unsigned long *cntr_mask, *fixed_cntr_mask; + struct event_constraint *pebs_constraints; + struct cpu_hw_events *cpuc; u64 pebs, debugctl; - int cpu = smp_processor_id(); - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask); - unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask); - struct event_constraint *pebs_constraints = hybrid(cpuc->pmu, pebs_constraints); - unsigned long flags; - int idx; + int cpu, idx; + + guard(irqsave)(); + + cpu = smp_processor_id(); + cpuc = &per_cpu(cpu_hw_events, cpu); + cntr_mask = hybrid(cpuc->pmu, cntr_mask); + fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask); + pebs_constraints = hybrid(cpuc->pmu, pebs_constraints); if (!*(u64 *)cntr_mask) return; - local_irq_save(flags); - if (x86_pmu.version >= 2) { rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); @@ -1577,7 +1580,6 @@ void perf_event_print_debug(void) pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", cpu, idx, pmc_count); } - local_irq_restore(flags); } void x86_pmu_stop(struct perf_event *event, int flags) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index be58cfb012dd..9f116dfc4728 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -64,7 +64,7 @@ * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, - * RPL,SPR,MTL,ARL,LNL + * RPL,SPR,MTL,ARL,LNL,SRF * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 @@ -693,7 +693,8 @@ static const struct cstate_model srf_cstates __initconst = { .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | BIT(PERF_CSTATE_CORE_C6_RES), - .pkg_events = BIT(PERF_CSTATE_PKG_C6_RES), + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C6_RES), .module_events = BIT(PERF_CSTATE_MODULE_C6_RES), }; diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h index 6faaf27e8899..6cbd9ae58b21 100644 --- a/arch/x86/include/asm/cmdline.h +++ b/arch/x86/include/asm/cmdline.h @@ -2,6 +2,10 @@ #ifndef _ASM_X86_CMDLINE_H #define _ASM_X86_CMDLINE_H +#include <asm/setup.h> + +extern char builtin_cmdline[COMMAND_LINE_SIZE]; + int cmdline_find_option_bool(const char *cmdline_ptr, const char *option); int cmdline_find_option(const char *cmdline_ptr, const char *option, char *buffer, int bufsize); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 950a03e0181e..94e7b5a4fafe 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1305,6 +1305,7 @@ struct kvm_arch { u8 vm_type; bool has_private_mem; bool has_protected_state; + bool pre_fault_allowed; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index be5889bded49..1e0fe5f8ab84 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -462,7 +462,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) switch (c->x86_model) { case 0x00 ... 0x2f: case 0x40 ... 0x4f: - case 0x70 ... 0x7f: + case 0x60 ... 0x7f: setup_force_cpu_cap(X86_FEATURE_ZEN5); break; default: diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index b3fa61d45352..0b69bfbf345d 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -306,7 +306,7 @@ static void freq_invariance_enable(void) WARN_ON_ONCE(1); return; } - static_branch_enable(&arch_scale_freq_key); + static_branch_enable_cpuslocked(&arch_scale_freq_key); register_freq_invariance_syscore_ops(); pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); } @@ -323,8 +323,10 @@ static void __init bp_init_freq_invariance(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return; - if (intel_set_max_freq_ratio()) + if (intel_set_max_freq_ratio()) { + guard(cpus_read_lock)(); freq_invariance_enable(); + } } static void disable_freq_invariance_workfn(struct work_struct *work) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5d34cad9b7b1..6129dc2ba784 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -164,7 +164,7 @@ unsigned long saved_video_mode; static char __initdata command_line[COMMAND_LINE_SIZE]; #ifdef CONFIG_CMDLINE_BOOL -static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; +char builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; bool builtin_cmdline_added __ro_after_init; #endif diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 4287a8071a3a..472a1537b7a9 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -141,8 +141,8 @@ config KVM_AMD_SEV depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) select ARCH_HAS_CC_PLATFORM select KVM_GENERIC_PRIVATE_MEM - select HAVE_KVM_GMEM_PREPARE - select HAVE_KVM_GMEM_INVALIDATE + select HAVE_KVM_ARCH_GMEM_PREPARE + select HAVE_KVM_ARCH_GMEM_INVALIDATE help Provides support for launching Encrypted VMs (SEV) and Encrypted VMs with Encrypted State (SEV-ES) on AMD processors. diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a7172ba59ad2..4915acdbfcd8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1743,7 +1743,7 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic) s64 min_period = min_timer_period_us * 1000LL; if (apic->lapic_timer.period < min_period) { - pr_info_ratelimited( + pr_info_once( "vcpu %i: requested %lld ns " "lapic timer period limited to %lld ns\n", apic->vcpu->vcpu_id, diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 901be9e420a4..928cf84778b0 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4335,7 +4335,7 @@ static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, if (req_max_level) max_level = min(max_level, req_max_level); - return req_max_level; + return max_level; } static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, @@ -4743,6 +4743,9 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, u64 end; int r; + if (!vcpu->kvm->arch.pre_fault_allowed) + return -EOPNOTSUPP; + /* * reload is efficient when called repeatedly, so we can do it on * every iteration. @@ -7510,7 +7513,7 @@ static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot, const unsigned long end = start + KVM_PAGES_PER_HPAGE(level); if (level == PG_LEVEL_2M) - return kvm_range_has_memory_attributes(kvm, start, end, attrs); + return kvm_range_has_memory_attributes(kvm, start, end, ~0, attrs); for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) { if (hugepage_test_mixed(slot, gfn, level - 1) || diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index a16c873b3232..532df12b43c5 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2279,18 +2279,11 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf bool assigned; int level; - if (!kvm_mem_is_private(kvm, gfn)) { - pr_debug("%s: Failed to ensure GFN 0x%llx has private memory attribute set\n", - __func__, gfn); - ret = -EINVAL; - goto err; - } - ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level); if (ret || assigned) { pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n", __func__, gfn, ret, assigned); - ret = -EINVAL; + ret = ret ? -EINVAL : -EEXIST; goto err; } @@ -2549,6 +2542,14 @@ static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) data->gctx_paddr = __psp_pa(sev->snp_context); ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error); + /* + * Now that there will be no more SNP_LAUNCH_UPDATE ioctls, private pages + * can be given to the guest simply by marking the RMP entry as private. + * This can happen on first access and also with KVM_PRE_FAULT_MEMORY. + */ + if (!ret) + kvm->arch.pre_fault_allowed = true; + kfree(id_auth); e_free_id_block: diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c115d26844f7..d6f252555ab3 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4949,6 +4949,7 @@ static int svm_vm_init(struct kvm *kvm) to_kvm_sev_info(kvm)->need_init = true; kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM); + kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem; } if (!pause_filter_count || !pause_filter_thresh) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index af6c8cf6a37a..ef3d3511e4af 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12646,6 +12646,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.vm_type = type; kvm->arch.has_private_mem = (type == KVM_X86_SW_PROTECTED_VM); + /* Decided by the vendor code for other VM types. */ + kvm->arch.pre_fault_allowed = + type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM; ret = kvm_page_track_init(kvm); if (ret) @@ -13641,19 +13644,14 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_arch_no_poll); -#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE -bool kvm_arch_gmem_prepare_needed(struct kvm *kvm) -{ - return kvm->arch.vm_type == KVM_X86_SNP_VM; -} - +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order) { return kvm_x86_call(gmem_prepare)(kvm, pfn, gfn, max_order); } #endif -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) { kvm_x86_call(gmem_invalidate)(start, end); diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c index 384da1fdd5c6..c65cd5550454 100644 --- a/arch/x86/lib/cmdline.c +++ b/arch/x86/lib/cmdline.c @@ -207,18 +207,29 @@ __cmdline_find_option(const char *cmdline, int max_cmdline_size, int cmdline_find_option_bool(const char *cmdline, const char *option) { - if (IS_ENABLED(CONFIG_CMDLINE_BOOL)) - WARN_ON_ONCE(!builtin_cmdline_added); + int ret; - return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); + ret = __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); + if (ret > 0) + return ret; + + if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && !builtin_cmdline_added) + return __cmdline_find_option_bool(builtin_cmdline, COMMAND_LINE_SIZE, option); + + return ret; } int cmdline_find_option(const char *cmdline, const char *option, char *buffer, int bufsize) { - if (IS_ENABLED(CONFIG_CMDLINE_BOOL)) - WARN_ON_ONCE(!builtin_cmdline_added); + int ret; + + ret = __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, buffer, bufsize); + if (ret > 0) + return ret; + + if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && !builtin_cmdline_added) + return __cmdline_find_option(builtin_cmdline, COMMAND_LINE_SIZE, option, buffer, bufsize); - return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, - buffer, bufsize); + return ret; } diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index a314622aa093..d066aecf8aeb 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -88,12 +88,14 @@ SYM_FUNC_END(__get_user_4) EXPORT_SYMBOL(__get_user_4) SYM_FUNC_START(__get_user_8) +#ifndef CONFIG_X86_64 + xor %ecx,%ecx +#endif check_range size=8 ASM_STAC #ifdef CONFIG_X86_64 UACCESS movq (%_ASM_AX),%rdx #else - xor %ecx,%ecx UACCESS movl (%_ASM_AX),%edx UACCESS movl 4(%_ASM_AX),%ecx #endif diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 2e69abf4f852..bfdf5f45b137 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -374,14 +374,14 @@ pti_clone_pgtable(unsigned long start, unsigned long end, */ *target_pmd = *pmd; - addr += PMD_SIZE; + addr = round_up(addr + 1, PMD_SIZE); } else if (level == PTI_CLONE_PTE) { /* Walk the page-table down to the pte level */ pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) { - addr += PAGE_SIZE; + addr = round_up(addr + 1, PAGE_SIZE); continue; } @@ -401,7 +401,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end, /* Clone the PTE */ *target_pte = *pte; - addr += PAGE_SIZE; + addr = round_up(addr + 1, PAGE_SIZE); } else { BUG(); @@ -496,7 +496,7 @@ static void pti_clone_entry_text(void) { pti_clone_pgtable((unsigned long) __entry_text_start, (unsigned long) __entry_text_end, - PTI_CLONE_PMD); + PTI_LEVEL_KERNEL_IMAGE); } /* diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 90a94a111e67..769fa288179d 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -413,6 +413,7 @@ config BT_ATH3K config BT_MTKSDIO tristate "MediaTek HCI SDIO driver" depends on MMC + depends on USB || !BT_HCIBTUSB_MTK select BT_MTK help MediaTek Bluetooth HCI SDIO driver. @@ -425,6 +426,7 @@ config BT_MTKSDIO config BT_MTKUART tristate "MediaTek HCI UART driver" depends on SERIAL_DEV_BUS + depends on USB || !BT_HCIBTUSB_MTK select BT_MTK help MediaTek Bluetooth HCI UART driver. diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index e7a612504ab1..2ebc970e6573 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -3085,6 +3085,9 @@ static int btintel_setup_combined(struct hci_dev *hdev) btintel_set_dsm_reset_method(hdev, &ver_tlv); err = btintel_bootloader_setup_tlv(hdev, &ver_tlv); + if (err) + goto exit_error; + btintel_register_devcoredump_support(hdev); btintel_print_fseq_info(hdev); break; diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index b7c348687a77..2b7c80043aa2 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -437,6 +437,7 @@ int btmtk_process_coredump(struct hci_dev *hdev, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(btmtk_process_coredump); +#if IS_ENABLED(CONFIG_BT_HCIBTUSB_MTK) static void btmtk_usb_wmt_recv(struct urb *urb) { struct hci_dev *hdev = urb->context; @@ -1262,7 +1263,8 @@ int btmtk_usb_suspend(struct hci_dev *hdev) struct btmtk_data *btmtk_data = hci_get_priv(hdev); /* Stop urb anchor for iso data transmission */ - usb_kill_anchored_urbs(&btmtk_data->isopkt_anchor); + if (test_bit(BTMTK_ISOPKT_RUNNING, &btmtk_data->flags)) + usb_kill_anchored_urbs(&btmtk_data->isopkt_anchor); return 0; } @@ -1487,6 +1489,7 @@ int btmtk_usb_shutdown(struct hci_dev *hdev) return 0; } EXPORT_SYMBOL_GPL(btmtk_usb_shutdown); +#endif MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>"); MODULE_AUTHOR("Mark Chen <mark-yw.chen@mediatek.com>"); diff --git a/drivers/cache/Kconfig b/drivers/cache/Kconfig index 94abd8f632a7..db51386c663a 100644 --- a/drivers/cache/Kconfig +++ b/drivers/cache/Kconfig @@ -18,6 +18,7 @@ config STARFIVE_STARLINK_CACHE bool "StarFive StarLink Cache controller" depends on RISCV depends on ARCH_STARFIVE + depends on 64BIT select RISCV_DMA_NONCOHERENT select RISCV_NONSTANDARD_CACHE_OPS help diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index f23ba62ce127..ed4e8ddbe76a 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -27,7 +27,8 @@ cflags-$(CONFIG_ARM64) += -fpie $(DISABLE_STACKLEAK_PLUGIN) \ cflags-$(CONFIG_ARM) += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \ -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \ -DEFI_HAVE_STRCMP -fno-builtin -fpic \ - $(call cc-option,-mno-single-pic-base) + $(call cc-option,-mno-single-pic-base) \ + $(DISABLE_STACKLEAK_PLUGIN) cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE -mno-relax \ $(DISABLE_STACKLEAK_PLUGIN) cflags-$(CONFIG_LOONGARCH) += -fpie @@ -57,6 +58,10 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_CFI), $(KBUILD_CFLAGS)) # disable LTO KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) +# The .data section would be renamed to .data.efistub, therefore, remove +# `-fdata-sections` flag from KBUILD_CFLAGS_KERNEL +KBUILD_CFLAGS_KERNEL := $(filter-out -fdata-sections, $(KBUILD_CFLAGS_KERNEL)) + lib-y := efi-stub-helper.o gop.o secureboot.o tpm.o \ file.o mem.o random.o randomalloc.o pci.o \ skip_spaces.o lib-cmdline.o lib-ctype.o \ diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index fd0749c0c630..6b2c6b91f962 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -268,6 +268,7 @@ config DRM_EXEC config DRM_GPUVM tristate depends on DRM + select DRM_EXEC help GPU-VM representation providing helpers to manage a GPUs virtual address space diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 916b6b8cf7d9..9aa952f258cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1778,7 +1778,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va_mapping *mapping; - int r; + int i, r; addr /= AMDGPU_GPU_PAGE_SIZE; @@ -1793,13 +1793,13 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) return -EINVAL; - if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { - (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); - r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); - if (r) - return r; - } + (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); + for (i = 0; i < (*bo)->placement.num_placement; i++) + (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); + if (r) + return r; return amdgpu_ttm_alloc_gart(&(*bo)->tbo); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index e499d6ba306b..dac88d2dd70d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -103,7 +103,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) if (!amdgpu_mes_log_enable) return 0; - r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, @@ -113,7 +113,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) return r; } - memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE); + memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size); return 0; @@ -1573,7 +1573,7 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, - mem, AMDGPU_MES_LOG_BUFFER_SIZE, false); + mem, adev->mes.event_log_size, false); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index e11051271f71..2d659c612f03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -52,7 +52,6 @@ enum amdgpu_mes_priority_level { #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */ #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */ -#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */ struct amdgpu_mes_funcs; @@ -135,8 +134,9 @@ struct amdgpu_mes { unsigned long *doorbell_bitmap; /* MES event log buffer */ - struct amdgpu_bo *event_log_gpu_obj; - uint64_t event_log_gpu_addr; + uint32_t event_log_size; + struct amdgpu_bo *event_log_gpu_obj; + uint64_t event_log_gpu_addr; void *event_log_cpu_addr; /* ip specific functions */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 8ce51b9236c1..f9343642ae7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1163,6 +1163,8 @@ static int mes_v11_0_sw_init(void *handle) adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini; + adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; + r = amdgpu_mes_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index c9f74231ad59..0713bc3eb263 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -551,8 +551,10 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.oversubscription_timer = 50; mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; - mes_set_hw_res_pkt.enable_mes_event_int_logging = 0; - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + if (amdgpu_mes_log_enable) { + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + } return mes_v12_0_submit_pkt_and_poll_completion(mes, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), @@ -1237,6 +1239,8 @@ static int mes_v12_0_sw_init(void *handle) adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini; + adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; + r = amdgpu_mes_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index b72d5d362251..21ceafce1f9b 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -28,6 +28,9 @@ #define MES_API_VERSION 1 +/* Maximum log buffer size for MES. Needs to be updated if MES expands MES_EVT_INTR_HIST_LOG */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 + /* Driver submits one API(cmd) as a single Frame and this command size is same * for all API to ease the debugging and parsing of ring buffer. */ diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index ffd67c6ed9b3..4cf2c9f30b3d 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -28,6 +28,9 @@ #define MES_API_VERSION 0x14 +/* Maximum log buffer size for MES. Needs to be updated if MES expands MES_EVT_INTR_HIST_LOG_12 */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0xC000 + /* Driver submits one API(cmd) as a single Frame and this command size is same for all API * to ease the debugging and parsing of ring buffer. */ diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index a1b8a82d77cf..8b7d6ed7e2ed 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -618,7 +618,8 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_versio const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; int r = 0; - if (!pp_funcs || !pp_funcs->load_firmware || adev->flags & AMD_IS_APU) + if (!pp_funcs || !pp_funcs->load_firmware || + (is_support_sw_smu(adev) && (adev->flags & AMD_IS_APU))) return 0; mutex_lock(&adev->pm.mutex); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 98ea58d792ca..e1a27903c80a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -66,6 +66,7 @@ #define MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE 0x4000 #define DEBUGSMC_MSG_Mode1Reset 2 +#define LINK_SPEED_MAX 3 static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), @@ -221,7 +222,6 @@ static struct cmn2asic_mapping smu_v14_0_2_workload_map[PP_SMC_POWER_PROFILE_COU WORKLOAD_MAP(PP_SMC_POWER_PROFILE_WINDOW3D, WORKLOAD_PPLIB_WINDOW_3D_BIT), }; -#if 0 static const uint8_t smu_v14_0_2_throttler_map[] = { [THROTTLER_PPT0_BIT] = (SMU_THROTTLER_PPT0_BIT), [THROTTLER_PPT1_BIT] = (SMU_THROTTLER_PPT1_BIT), @@ -241,7 +241,6 @@ static const uint8_t smu_v14_0_2_throttler_map[] = { [THROTTLER_GFX_APCC_PLUS_BIT] = (SMU_THROTTLER_APCC_BIT), [THROTTLER_FIT_BIT] = (SMU_THROTTLER_FIT_BIT), }; -#endif static int smu_v14_0_2_get_allowed_feature_mask(struct smu_context *smu, @@ -1869,6 +1868,88 @@ static ssize_t smu_v14_0_2_get_ecc_info(struct smu_context *smu, return ret; } +static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, + void **table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct gpu_metrics_v1_3 *gpu_metrics = + (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table; + SmuMetricsExternal_t metrics_ext; + SmuMetrics_t *metrics = &metrics_ext.SmuMetrics; + int ret = 0; + + ret = smu_cmn_get_metrics_table(smu, + &metrics_ext, + true); + if (ret) + return ret; + + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3); + + gpu_metrics->temperature_edge = metrics->AvgTemperature[TEMP_EDGE]; + gpu_metrics->temperature_hotspot = metrics->AvgTemperature[TEMP_HOTSPOT]; + gpu_metrics->temperature_mem = metrics->AvgTemperature[TEMP_MEM]; + gpu_metrics->temperature_vrgfx = metrics->AvgTemperature[TEMP_VR_GFX]; + gpu_metrics->temperature_vrsoc = metrics->AvgTemperature[TEMP_VR_SOC]; + gpu_metrics->temperature_vrmem = max(metrics->AvgTemperature[TEMP_VR_MEM0], + metrics->AvgTemperature[TEMP_VR_MEM1]); + + gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity; + gpu_metrics->average_umc_activity = metrics->AverageUclkActivity; + gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage, + metrics->Vcn1ActivityPercentage); + + gpu_metrics->average_socket_power = metrics->AverageSocketPower; + gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; + + if (metrics->AverageGfxActivity <= SMU_14_0_2_BUSY_THRESHOLD) + gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs; + else + gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs; + + if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD) + gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs; + else + gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs; + + gpu_metrics->average_vclk0_frequency = metrics->AverageVclk0Frequency; + gpu_metrics->average_dclk0_frequency = metrics->AverageDclk0Frequency; + gpu_metrics->average_vclk1_frequency = metrics->AverageVclk1Frequency; + gpu_metrics->average_dclk1_frequency = metrics->AverageDclk1Frequency; + + gpu_metrics->current_gfxclk = gpu_metrics->average_gfxclk_frequency; + gpu_metrics->current_socclk = metrics->CurrClock[PPCLK_SOCCLK]; + gpu_metrics->current_uclk = metrics->CurrClock[PPCLK_UCLK]; + gpu_metrics->current_vclk0 = metrics->CurrClock[PPCLK_VCLK_0]; + gpu_metrics->current_dclk0 = metrics->CurrClock[PPCLK_DCLK_0]; + gpu_metrics->current_vclk1 = metrics->CurrClock[PPCLK_VCLK_0]; + gpu_metrics->current_dclk1 = metrics->CurrClock[PPCLK_DCLK_0]; + + gpu_metrics->throttle_status = + smu_v14_0_2_get_throttler_status(metrics); + gpu_metrics->indep_throttle_status = + smu_cmn_get_indep_throttler_status(gpu_metrics->throttle_status, + smu_v14_0_2_throttler_map); + + gpu_metrics->current_fan_speed = metrics->AvgFanRpm; + + gpu_metrics->pcie_link_width = metrics->PcieWidth; + if ((metrics->PcieRate - 1) > LINK_SPEED_MAX) + gpu_metrics->pcie_link_speed = pcie_gen_to_speed(1); + else + gpu_metrics->pcie_link_speed = pcie_gen_to_speed(metrics->PcieRate); + + gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); + + gpu_metrics->voltage_gfx = metrics->AvgVoltage[SVI_PLANE_VDD_GFX]; + gpu_metrics->voltage_soc = metrics->AvgVoltage[SVI_PLANE_VDD_SOC]; + gpu_metrics->voltage_mem = metrics->AvgVoltage[SVI_PLANE_VDDIO_MEM]; + + *table = (void *)gpu_metrics; + + return sizeof(struct gpu_metrics_v1_3); +} + static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .get_allowed_feature_mask = smu_v14_0_2_get_allowed_feature_mask, .set_default_dpm_table = smu_v14_0_2_set_default_dpm_table, @@ -1905,6 +1986,7 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .enable_thermal_alert = smu_v14_0_enable_thermal_alert, .disable_thermal_alert = smu_v14_0_disable_thermal_alert, .notify_memory_pool_location = smu_v14_0_notify_memory_pool_location, + .get_gpu_metrics = smu_v14_0_2_get_gpu_metrics, .set_soft_freq_limited_range = smu_v14_0_set_soft_freq_limited_range, .init_pptable_microcode = smu_v14_0_init_pptable_microcode, .populate_umd_state_clk = smu_v14_0_2_populate_umd_state_clk, diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index 1e9259416980..e6c7f0d64e99 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -158,7 +158,14 @@ void ast_dp_launch(struct drm_device *dev) ASTDP_HOST_EDID_READ_DONE); } +bool ast_dp_power_is_on(struct ast_device *ast) +{ + u8 vgacre3; + + vgacre3 = ast_get_index_reg(ast, AST_IO_VGACRI, 0xe3); + return !(vgacre3 & AST_DP_PHY_SLEEP); +} void ast_dp_power_on_off(struct drm_device *dev, bool on) { diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index aae019e79bda..225817087b4d 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -391,6 +391,11 @@ static int ast_drm_freeze(struct drm_device *dev) static int ast_drm_thaw(struct drm_device *dev) { + struct ast_device *ast = to_ast_device(dev); + + ast_enable_vga(ast->ioregs); + ast_open_key(ast->ioregs); + ast_enable_mmio(dev->dev, ast->ioregs); ast_post_gpu(dev); return drm_mode_config_helper_resume(dev); diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index ba3d86973995..47bab5596c16 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -472,6 +472,7 @@ void ast_init_3rdtx(struct drm_device *dev); bool ast_astdp_is_connected(struct ast_device *ast); int ast_astdp_read_edid(struct drm_device *dev, u8 *ediddata); void ast_dp_launch(struct drm_device *dev); +bool ast_dp_power_is_on(struct ast_device *ast); void ast_dp_power_on_off(struct drm_device *dev, bool no); void ast_dp_set_on_off(struct drm_device *dev, bool no); void ast_dp_set_mode(struct drm_crtc *crtc, struct ast_vbios_mode_info *vbios_mode); diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index dc8f639e82fd..049ee1477c33 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -28,6 +28,7 @@ * Authors: Dave Airlie <airlied@redhat.com> */ +#include <linux/delay.h> #include <linux/export.h> #include <linux/pci.h> @@ -1687,11 +1688,35 @@ static int ast_astdp_connector_helper_detect_ctx(struct drm_connector *connector struct drm_modeset_acquire_ctx *ctx, bool force) { + struct drm_device *dev = connector->dev; struct ast_device *ast = to_ast_device(connector->dev); + enum drm_connector_status status = connector_status_disconnected; + struct drm_connector_state *connector_state = connector->state; + bool is_active = false; + + mutex_lock(&ast->modeset_lock); + + if (connector_state && connector_state->crtc) { + struct drm_crtc_state *crtc_state = connector_state->crtc->state; + + if (crtc_state && crtc_state->active) + is_active = true; + } + + if (!is_active && !ast_dp_power_is_on(ast)) { + ast_dp_power_on_off(dev, true); + msleep(50); + } if (ast_astdp_is_connected(ast)) - return connector_status_connected; - return connector_status_disconnected; + status = connector_status_connected; + + if (!is_active && status == connector_status_disconnected) + ast_dp_power_on_off(dev, false); + + mutex_unlock(&ast->modeset_lock); + + return status; } static const struct drm_connector_helper_funcs ast_astdp_connector_helper_funcs = { diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 22bbb2d83e30..7609c798d73d 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -1070,7 +1070,10 @@ int drm_atomic_set_property(struct drm_atomic_state *state, break; } - if (async_flip && prop != config->prop_fb_id) { + if (async_flip && + prop != config->prop_fb_id && + prop != config->prop_in_fence_fd && + prop != config->prop_fb_damage_clips) { ret = drm_atomic_plane_get_property(plane, plane_state, prop, &old_val); ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index 2803ac111bbd..bfedcbf516db 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -355,7 +355,7 @@ int drm_client_buffer_vmap_local(struct drm_client_buffer *buffer, err_drm_gem_vmap_unlocked: drm_gem_unlock(gem); - return 0; + return ret; } EXPORT_SYMBOL(drm_client_buffer_vmap_local); diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 18565ec68451..56ac37ea2f27 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -624,6 +624,17 @@ static void drm_fb_helper_add_damage_clip(struct drm_fb_helper *helper, u32 x, u static void drm_fb_helper_damage(struct drm_fb_helper *helper, u32 x, u32 y, u32 width, u32 height) { + /* + * This function may be invoked by panic() to flush the frame + * buffer, where all CPUs except the panic CPU are stopped. + * During the following schedule_work(), the panic CPU needs + * the worker_pool lock, which might be held by a stopped CPU, + * causing schedule_work() and panic() to block. Return early on + * oops_in_progress to prevent this blocking. + */ + if (oops_in_progress) + return; + drm_fb_helper_add_damage_clip(helper, x, y, width, height); schedule_work(&helper->damage_work); diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 3f84d7527793..c16c7678237e 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -414,6 +414,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ONE XPLAYER"), }, .driver_data = (void *)&lcd1600x2560_leftside_up, + }, { /* OrangePi Neo */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "OrangePi"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "NEO-01"), + }, + .driver_data = (void *)&lcd1200x1920_rightside_up, }, { /* Samsung GalaxyBook 10.6 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 90998b037349..292d163036b1 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -1658,7 +1658,7 @@ static void skl_wrpll_params_populate(struct skl_wrpll_params *params, } static int -skl_ddi_calculate_wrpll(int clock /* in Hz */, +skl_ddi_calculate_wrpll(int clock, int ref_clock, struct skl_wrpll_params *wrpll_params) { @@ -1683,7 +1683,7 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */, }; unsigned int dco, d, i; unsigned int p0, p1, p2; - u64 afe_clock = clock * 5; /* AFE Clock is 5x Pixel clock */ + u64 afe_clock = (u64)clock * 1000 * 5; /* AFE Clock is 5x Pixel clock, in Hz */ for (d = 0; d < ARRAY_SIZE(dividers); d++) { for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) { @@ -1808,7 +1808,7 @@ static int skl_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state) struct skl_wrpll_params wrpll_params = {}; int ret; - ret = skl_ddi_calculate_wrpll(crtc_state->port_clock * 1000, + ret = skl_ddi_calculate_wrpll(crtc_state->port_clock, i915->display.dpll.ref_clks.nssc, &wrpll_params); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h index a568a457e532..f590d7f48ba7 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h @@ -251,7 +251,7 @@ #define HDCP2_STREAM_STATUS(dev_priv, trans, port) \ (TRANS_HDCP(dev_priv) ? \ TRANS_HDCP2_STREAM_STATUS(trans) : \ - PIPE_HDCP2_STREAM_STATUS(pipe)) + PIPE_HDCP2_STREAM_STATUS(port)) #define _PORTA_HDCP2_AUTH_STREAM 0x66F00 #define _PORTB_HDCP2_AUTH_STREAM 0x66F04 diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 0b1cd4c7a525..025a79fe5920 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2749,26 +2749,6 @@ oa_configure_all_contexts(struct i915_perf_stream *stream, } static int -gen12_configure_all_contexts(struct i915_perf_stream *stream, - const struct i915_oa_config *oa_config, - struct i915_active *active) -{ - struct flex regs[] = { - { - GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE), - CTX_R_PWR_CLK_STATE, - }, - }; - - if (stream->engine->class != RENDER_CLASS) - return 0; - - return oa_configure_all_contexts(stream, - regs, ARRAY_SIZE(regs), - active); -} - -static int lrc_configure_all_contexts(struct i915_perf_stream *stream, const struct i915_oa_config *oa_config, struct i915_active *active) @@ -2874,7 +2854,6 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, { struct drm_i915_private *i915 = stream->perf->i915; struct intel_uncore *uncore = stream->uncore; - struct i915_oa_config *oa_config = stream->oa_config; bool periodic = stream->periodic; u32 period_exponent = stream->period_exponent; u32 sqcnt1; @@ -2919,15 +2898,6 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1); /* - * Update all contexts prior writing the mux configurations as we need - * to make sure all slices/subslices are ON before writing to NOA - * registers. - */ - ret = gen12_configure_all_contexts(stream, oa_config, active); - if (ret) - return ret; - - /* * For Gen12, performance counters are context * saved/restored. Only enable it for the context that * requested this. @@ -2980,9 +2950,6 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) _MASKED_BIT_DISABLE(GEN12_DISABLE_DOP_GATING)); } - /* Reset all contexts' slices/subslices configurations. */ - gen12_configure_all_contexts(stream, NULL, NULL); - /* disable the context save/restore or OAR counters */ if (stream->ctx) gen12_configure_oar_context(stream, NULL); diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 0712d0b15170..70fb003a6666 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -898,7 +898,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, * Without this the operation can timeout and we'll fallback to a * software copy, which might take several minutes to finish. */ - nouveau_fence_wait(fence, false); + nouveau_fence_wait(fence, false, false); ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false, new_reg); nouveau_fence_unref(&fence); diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 66fca95c10c7..7c97b2886807 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -72,7 +72,7 @@ nouveau_channel_idle(struct nouveau_channel *chan) ret = nouveau_fence_new(&fence, chan); if (!ret) { - ret = nouveau_fence_wait(fence, false); + ret = nouveau_fence_wait(fence, false, false); nouveau_fence_unref(&fence); } diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 6719353e2e13..6fb65b01d778 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -128,7 +128,7 @@ static void nouveau_dmem_page_free(struct page *page) static void nouveau_dmem_fence_done(struct nouveau_fence **fence) { if (fence) { - nouveau_fence_wait(*fence, false); + nouveau_fence_wait(*fence, true, false); nouveau_fence_unref(fence); } else { /* diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index ba469767a20f..93f08f9479d8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -311,11 +311,39 @@ nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait) return timeout - t; } +static int +nouveau_fence_wait_busy(struct nouveau_fence *fence, bool intr) +{ + int ret = 0; + + while (!nouveau_fence_done(fence)) { + if (time_after_eq(jiffies, fence->timeout)) { + ret = -EBUSY; + break; + } + + __set_current_state(intr ? + TASK_INTERRUPTIBLE : + TASK_UNINTERRUPTIBLE); + + if (intr && signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + } + + __set_current_state(TASK_RUNNING); + return ret; +} + int -nouveau_fence_wait(struct nouveau_fence *fence, bool intr) +nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) { long ret; + if (!lazy) + return nouveau_fence_wait_busy(fence, intr); + ret = dma_fence_wait_timeout(&fence->base, intr, 15 * HZ); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 1b63197b744a..8bc065acfe35 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -23,7 +23,7 @@ void nouveau_fence_unref(struct nouveau_fence **); int nouveau_fence_emit(struct nouveau_fence *); bool nouveau_fence_done(struct nouveau_fence *); -int nouveau_fence_wait(struct nouveau_fence *, bool intr); +int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr); struct nouveau_fence_chan { diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 2e535caa7d6e..5a887d67dc0e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -928,7 +928,7 @@ revalidate: } if (sync) { - if (!(ret = nouveau_fence_wait(fence, false))) { + if (!(ret = nouveau_fence_wait(fence, false, false))) { if ((ret = dma_fence_get_status(&fence->base)) == 1) ret = 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index b58ab595faf8..cd95446d6851 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -64,7 +64,8 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev, * to the caller, instead of a normal nouveau_bo ttm reference. */ ret = drm_gem_object_init(dev, &nvbo->bo.base, size); if (ret) { - nouveau_bo_ref(NULL, &nvbo); + drm_gem_object_release(&nvbo->bo.base); + kfree(nvbo); obj = ERR_PTR(-ENOMEM); goto unlock; } diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 9402fa320a7e..48f105239f42 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1803,6 +1803,7 @@ nouveau_uvmm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) { struct nouveau_bo *nvbo = nouveau_gem_object(vm_bo->obj); + nouveau_bo_placement_set(nvbo, nvbo->valid_domains, 0); return nouveau_bo_validate(nvbo, true, false); } diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 49089eefb7c7..a0febdb6f214 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -565,6 +565,10 @@ void v3d_mmu_insert_ptes(struct v3d_bo *bo); void v3d_mmu_remove_ptes(struct v3d_bo *bo); /* v3d_sched.c */ +void v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info, + unsigned int count); +void v3d_performance_query_info_free(struct v3d_performance_query_info *query_info, + unsigned int count); void v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue); int v3d_sched_init(struct v3d_dev *v3d); void v3d_sched_fini(struct v3d_dev *v3d); diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 271a6d0f5aca..9bd7453b25ad 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -73,24 +73,44 @@ v3d_sched_job_free(struct drm_sched_job *sched_job) v3d_job_cleanup(job); } +void +v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info, + unsigned int count) +{ + if (query_info->queries) { + unsigned int i; + + for (i = 0; i < count; i++) + drm_syncobj_put(query_info->queries[i].syncobj); + + kvfree(query_info->queries); + } +} + +void +v3d_performance_query_info_free(struct v3d_performance_query_info *query_info, + unsigned int count) +{ + if (query_info->queries) { + unsigned int i; + + for (i = 0; i < count; i++) + drm_syncobj_put(query_info->queries[i].syncobj); + + kvfree(query_info->queries); + } +} + static void v3d_cpu_job_free(struct drm_sched_job *sched_job) { struct v3d_cpu_job *job = to_cpu_job(sched_job); - struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; - struct v3d_performance_query_info *performance_query = &job->performance_query; - if (timestamp_query->queries) { - for (int i = 0; i < timestamp_query->count; i++) - drm_syncobj_put(timestamp_query->queries[i].syncobj); - kvfree(timestamp_query->queries); - } + v3d_timestamp_query_info_free(&job->timestamp_query, + job->timestamp_query.count); - if (performance_query->queries) { - for (int i = 0; i < performance_query->count; i++) - drm_syncobj_put(performance_query->queries[i].syncobj); - kvfree(performance_query->queries); - } + v3d_performance_query_info_free(&job->performance_query, + job->performance_query.count); v3d_job_cleanup(&job->base); } diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index 88f63d526b22..4cdfabbf4964 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -452,6 +452,8 @@ v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, { u32 __user *offsets, *syncs; struct drm_v3d_timestamp_query timestamp; + unsigned int i; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -480,26 +482,34 @@ v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, offsets = u64_to_user_ptr(timestamp.offsets); syncs = u64_to_user_ptr(timestamp.syncs); - for (int i = 0; i < timestamp.count; i++) { + for (i = 0; i < timestamp.count; i++) { u32 offset, sync; if (copy_from_user(&offset, offsets++, sizeof(offset))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].offset = offset; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->timestamp_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->timestamp_query.count = timestamp.count; return 0; + +error: + v3d_timestamp_query_info_free(&job->timestamp_query, i); + return err; } static int @@ -509,6 +519,8 @@ v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, { u32 __user *syncs; struct drm_v3d_reset_timestamp_query reset; + unsigned int i; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -533,21 +545,29 @@ v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, syncs = u64_to_user_ptr(reset.syncs); - for (int i = 0; i < reset.count; i++) { + for (i = 0; i < reset.count; i++) { u32 sync; job->timestamp_query.queries[i].offset = reset.offset + 8 * i; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->timestamp_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->timestamp_query.count = reset.count; return 0; + +error: + v3d_timestamp_query_info_free(&job->timestamp_query, i); + return err; } /* Get data for the copy timestamp query results job submission. */ @@ -558,7 +578,8 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, { u32 __user *offsets, *syncs; struct drm_v3d_copy_timestamp_query copy; - int i; + unsigned int i; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -591,18 +612,22 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, u32 offset, sync; if (copy_from_user(&offset, offsets++, sizeof(offset))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].offset = offset; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->timestamp_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->timestamp_query.count = copy.count; @@ -613,6 +638,10 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, job->copy.stride = copy.stride; return 0; + +error: + v3d_timestamp_query_info_free(&job->timestamp_query, i); + return err; } static int @@ -623,6 +652,8 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, u32 __user *syncs; u64 __user *kperfmon_ids; struct drm_v3d_reset_performance_query reset; + unsigned int i, j; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -637,6 +668,9 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, if (copy_from_user(&reset, ext, sizeof(reset))) return -EFAULT; + if (reset.nperfmons > V3D_MAX_PERFMONS) + return -EINVAL; + job->job_type = V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY; job->performance_query.queries = kvmalloc_array(reset.count, @@ -648,39 +682,47 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, syncs = u64_to_user_ptr(reset.syncs); kperfmon_ids = u64_to_user_ptr(reset.kperfmon_ids); - for (int i = 0; i < reset.count; i++) { + for (i = 0; i < reset.count; i++) { u32 sync; u64 ids; u32 __user *ids_pointer; u32 id; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } - job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); - if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } ids_pointer = u64_to_user_ptr(ids); - for (int j = 0; j < reset.nperfmons; j++) { + for (j = 0; j < reset.nperfmons; j++) { if (copy_from_user(&id, ids_pointer++, sizeof(id))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->performance_query.queries[i].kperfmon_ids[j] = id; } + + job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->performance_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->performance_query.count = reset.count; job->performance_query.nperfmons = reset.nperfmons; return 0; + +error: + v3d_performance_query_info_free(&job->performance_query, i); + return err; } static int @@ -691,6 +733,8 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, u32 __user *syncs; u64 __user *kperfmon_ids; struct drm_v3d_copy_performance_query copy; + unsigned int i, j; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -708,6 +752,9 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, if (copy.pad) return -EINVAL; + if (copy.nperfmons > V3D_MAX_PERFMONS) + return -EINVAL; + job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY; job->performance_query.queries = kvmalloc_array(copy.count, @@ -719,34 +766,38 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, syncs = u64_to_user_ptr(copy.syncs); kperfmon_ids = u64_to_user_ptr(copy.kperfmon_ids); - for (int i = 0; i < copy.count; i++) { + for (i = 0; i < copy.count; i++) { u32 sync; u64 ids; u32 __user *ids_pointer; u32 id; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } - job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); - if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } ids_pointer = u64_to_user_ptr(ids); - for (int j = 0; j < copy.nperfmons; j++) { + for (j = 0; j < copy.nperfmons; j++) { if (copy_from_user(&id, ids_pointer++, sizeof(id))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->performance_query.queries[i].kperfmon_ids[j] = id; } + + job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->performance_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->performance_query.count = copy.count; job->performance_query.nperfmons = copy.nperfmons; @@ -759,6 +810,10 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, job->copy.stride = copy.stride; return 0; + +error: + v3d_performance_query_info_free(&job->performance_query, i); + return err; } /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data diff --git a/drivers/gpu/drm/virtio/virtgpu_submit.c b/drivers/gpu/drm/virtio/virtgpu_submit.c index 1c7c7f61a222..7d34cf83f5f2 100644 --- a/drivers/gpu/drm/virtio/virtgpu_submit.c +++ b/drivers/gpu/drm/virtio/virtgpu_submit.c @@ -48,7 +48,7 @@ struct virtio_gpu_submit { static int virtio_gpu_do_fence_wait(struct virtio_gpu_submit *submit, struct dma_fence *in_fence) { - u32 context = submit->fence_ctx + submit->ring_idx; + u64 context = submit->fence_ctx + submit->ring_idx; if (dma_fence_match_context(in_fence, context)) return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h b/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h index b0d87c5f58d8..1ac3cb151b11 100644 --- a/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h +++ b/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h @@ -1,6 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /********************************************************** - * Copyright 2021 VMware, Inc. - * SPDX-License-Identifier: GPL-2.0 OR MIT + * + * Copyright (c) 2021-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -31,6 +33,10 @@ #include <drm/vmwgfx_drm.h> +#define SVGA3D_FLAGS_UPPER_32(svga3d_flags) ((svga3d_flags) >> 32) +#define SVGA3D_FLAGS_LOWER_32(svga3d_flags) \ + ((svga3d_flags) & ((uint64_t)U32_MAX)) + static inline u32 clamped_umul32(u32 a, u32 b) { uint64_t tmp = (uint64_t) a*b; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 00144632c600..f42ebc4a7c22 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright © 2011-2023 VMware, Inc., Palo Alto, CA., USA - * All Rights Reserved. + * Copyright (c) 2011-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -28,15 +28,39 @@ #include "vmwgfx_bo.h" #include "vmwgfx_drv.h" - +#include "vmwgfx_resource_priv.h" #include <drm/ttm/ttm_placement.h> static void vmw_bo_release(struct vmw_bo *vbo) { + struct vmw_resource *res; + WARN_ON(vbo->tbo.base.funcs && kref_read(&vbo->tbo.base.refcount) != 0); vmw_bo_unmap(vbo); + + xa_destroy(&vbo->detached_resources); + WARN_ON(vbo->is_dumb && !vbo->dumb_surface); + if (vbo->is_dumb && vbo->dumb_surface) { + res = &vbo->dumb_surface->res; + WARN_ON(vbo != res->guest_memory_bo); + WARN_ON(!res->guest_memory_bo); + if (res->guest_memory_bo) { + /* Reserve and switch the backing mob. */ + mutex_lock(&res->dev_priv->cmdbuf_mutex); + (void)vmw_resource_reserve(res, false, true); + vmw_resource_mob_detach(res); + if (res->coherent) + vmw_bo_dirty_release(res->guest_memory_bo); + res->guest_memory_bo = NULL; + res->guest_memory_offset = 0; + vmw_resource_unreserve(res, false, false, false, NULL, + 0); + mutex_unlock(&res->dev_priv->cmdbuf_mutex); + } + vmw_surface_unreference(&vbo->dumb_surface); + } drm_gem_object_release(&vbo->tbo.base); } @@ -326,6 +350,11 @@ void vmw_bo_pin_reserved(struct vmw_bo *vbo, bool pin) */ void *vmw_bo_map_and_cache(struct vmw_bo *vbo) { + return vmw_bo_map_and_cache_size(vbo, vbo->tbo.base.size); +} + +void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size) +{ struct ttm_buffer_object *bo = &vbo->tbo; bool not_used; void *virtual; @@ -335,9 +364,10 @@ void *vmw_bo_map_and_cache(struct vmw_bo *vbo) if (virtual) return virtual; - ret = ttm_bo_kmap(bo, 0, PFN_UP(bo->base.size), &vbo->map); + ret = ttm_bo_kmap(bo, 0, PFN_UP(size), &vbo->map); if (ret) - DRM_ERROR("Buffer object map failed: %d.\n", ret); + DRM_ERROR("Buffer object map failed: %d (size: bo = %zu, map = %zu).\n", + ret, bo->base.size, size); return ttm_kmap_obj_virtual(&vbo->map, ¬_used); } @@ -390,6 +420,7 @@ static int vmw_bo_init(struct vmw_private *dev_priv, BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3); vmw_bo->tbo.priority = 3; vmw_bo->res_tree = RB_ROOT; + xa_init(&vmw_bo->detached_resources); params->size = ALIGN(params->size, PAGE_SIZE); drm_gem_private_object_init(vdev, &vmw_bo->tbo.base, params->size); @@ -654,52 +685,6 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo, dma_fence_put(&fence->base); } - -/** - * vmw_dumb_create - Create a dumb kms buffer - * - * @file_priv: Pointer to a struct drm_file identifying the caller. - * @dev: Pointer to the drm device. - * @args: Pointer to a struct drm_mode_create_dumb structure - * Return: Zero on success, negative error code on failure. - * - * This is a driver callback for the core drm create_dumb functionality. - * Note that this is very similar to the vmw_bo_alloc ioctl, except - * that the arguments have a different format. - */ -int vmw_dumb_create(struct drm_file *file_priv, - struct drm_device *dev, - struct drm_mode_create_dumb *args) -{ - struct vmw_private *dev_priv = vmw_priv(dev); - struct vmw_bo *vbo; - int cpp = DIV_ROUND_UP(args->bpp, 8); - int ret; - - switch (cpp) { - case 1: /* DRM_FORMAT_C8 */ - case 2: /* DRM_FORMAT_RGB565 */ - case 4: /* DRM_FORMAT_XRGB8888 */ - break; - default: - /* - * Dumb buffers don't allow anything else. - * This is tested via IGT's dumb_buffers - */ - return -EINVAL; - } - - args->pitch = args->width * cpp; - args->size = ALIGN(args->pitch * args->height, PAGE_SIZE); - - ret = vmw_gem_object_create_with_handle(dev_priv, file_priv, - args->size, &args->handle, - &vbo); - /* drop reference from allocate - handle holds it now */ - drm_gem_object_put(&vbo->tbo.base); - return ret; -} - /** * vmw_bo_swap_notify - swapout notify callback. * @@ -853,3 +838,43 @@ void vmw_bo_placement_set_default_accelerated(struct vmw_bo *bo) vmw_bo_placement_set(bo, domain, domain); } + +void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) +{ + xa_store(&vbo->detached_resources, (unsigned long)res, res, GFP_KERNEL); +} + +void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) +{ + xa_erase(&vbo->detached_resources, (unsigned long)res); +} + +struct vmw_surface *vmw_bo_surface(struct vmw_bo *vbo) +{ + unsigned long index; + struct vmw_resource *res = NULL; + struct vmw_surface *surf = NULL; + struct rb_node *rb_itr = vbo->res_tree.rb_node; + + if (vbo->is_dumb && vbo->dumb_surface) { + res = &vbo->dumb_surface->res; + goto out; + } + + xa_for_each(&vbo->detached_resources, index, res) { + if (res->func->res_type == vmw_res_surface) + goto out; + } + + for (rb_itr = rb_first(&vbo->res_tree); rb_itr; + rb_itr = rb_next(rb_itr)) { + res = rb_entry(rb_itr, struct vmw_resource, mob_node); + if (res->func->res_type == vmw_res_surface) + goto out; + } + +out: + if (res) + surf = vmw_res_to_srf(res); + return surf; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index f349642e6190..62b4342d5f7c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright 2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2023-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -35,11 +36,13 @@ #include <linux/rbtree_types.h> #include <linux/types.h> +#include <linux/xarray.h> struct vmw_bo_dirty; struct vmw_fence_obj; struct vmw_private; struct vmw_resource; +struct vmw_surface; enum vmw_bo_domain { VMW_BO_DOMAIN_SYS = BIT(0), @@ -85,11 +88,15 @@ struct vmw_bo { struct rb_root res_tree; u32 res_prios[TTM_MAX_BO_PRIORITY]; + struct xarray detached_resources; atomic_t cpu_writers; /* Not ref-counted. Protected by binding_mutex */ struct vmw_resource *dx_query_ctx; struct vmw_bo_dirty *dirty; + + bool is_dumb; + struct vmw_surface *dumb_surface; }; void vmw_bo_placement_set(struct vmw_bo *bo, u32 domain, u32 busy_domain); @@ -124,15 +131,21 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo, struct vmw_fence_obj *fence); void *vmw_bo_map_and_cache(struct vmw_bo *vbo); +void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size); void vmw_bo_unmap(struct vmw_bo *vbo); void vmw_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_resource *mem); void vmw_bo_swap_notify(struct ttm_buffer_object *bo); +void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); +void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); +struct vmw_surface *vmw_bo_surface(struct vmw_bo *vbo); + int vmw_user_bo_lookup(struct drm_file *filp, u32 handle, struct vmw_bo **out); + /** * vmw_bo_adjust_prio - Adjust the buffer object eviction priority * according to attached resources diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index a1ce41e1c468..32f50e595809 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -763,6 +764,26 @@ extern int vmw_gmr_bind(struct vmw_private *dev_priv, extern void vmw_gmr_unbind(struct vmw_private *dev_priv, int gmr_id); /** + * User handles + */ +struct vmw_user_object { + struct vmw_surface *surface; + struct vmw_bo *buffer; +}; + +int vmw_user_object_lookup(struct vmw_private *dev_priv, struct drm_file *filp, + u32 handle, struct vmw_user_object *uo); +struct vmw_user_object *vmw_user_object_ref(struct vmw_user_object *uo); +void vmw_user_object_unref(struct vmw_user_object *uo); +bool vmw_user_object_is_null(struct vmw_user_object *uo); +struct vmw_surface *vmw_user_object_surface(struct vmw_user_object *uo); +struct vmw_bo *vmw_user_object_buffer(struct vmw_user_object *uo); +void *vmw_user_object_map(struct vmw_user_object *uo); +void *vmw_user_object_map_size(struct vmw_user_object *uo, size_t size); +void vmw_user_object_unmap(struct vmw_user_object *uo); +bool vmw_user_object_is_mapped(struct vmw_user_object *uo); + +/** * Resource utilities - vmwgfx_resource.c */ struct vmw_user_resource_conv; @@ -776,11 +797,6 @@ extern int vmw_resource_validate(struct vmw_resource *res, bool intr, extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible, bool no_backup); extern bool vmw_resource_needs_backup(const struct vmw_resource *res); -extern int vmw_user_lookup_handle(struct vmw_private *dev_priv, - struct drm_file *filp, - uint32_t handle, - struct vmw_surface **out_surf, - struct vmw_bo **out_buf); extern int vmw_user_resource_lookup_handle( struct vmw_private *dev_priv, struct ttm_object_file *tfile, @@ -1057,9 +1073,6 @@ int vmw_kms_suspend(struct drm_device *dev); int vmw_kms_resume(struct drm_device *dev); void vmw_kms_lost_device(struct drm_device *dev); -int vmw_dumb_create(struct drm_file *file_priv, - struct drm_device *dev, - struct drm_mode_create_dumb *args); extern int vmw_resource_pin(struct vmw_resource *res, bool interruptible); extern void vmw_resource_unpin(struct vmw_resource *res); extern enum vmw_res_type vmw_res_type(const struct vmw_resource *res); @@ -1176,6 +1189,15 @@ extern int vmw_gb_surface_reference_ext_ioctl(struct drm_device *dev, int vmw_gb_surface_define(struct vmw_private *dev_priv, const struct vmw_surface_metadata *req, struct vmw_surface **srf_out); +struct vmw_surface *vmw_lookup_surface_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle); +u32 vmw_lookup_surface_handle_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle); +int vmw_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args); /* * Shader management - vmwgfx_shader.c diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 5efc6a766f64..588d50ababf6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -32,7 +32,6 @@ #define VMW_FENCE_WRAP (1 << 31) struct vmw_fence_manager { - int num_fence_objects; struct vmw_private *dev_priv; spinlock_t lock; struct list_head fence_list; @@ -124,13 +123,13 @@ static void vmw_fence_obj_destroy(struct dma_fence *f) { struct vmw_fence_obj *fence = container_of(f, struct vmw_fence_obj, base); - struct vmw_fence_manager *fman = fman_from_fence(fence); - spin_lock(&fman->lock); - list_del_init(&fence->head); - --fman->num_fence_objects; - spin_unlock(&fman->lock); + if (!list_empty(&fence->head)) { + spin_lock(&fman->lock); + list_del_init(&fence->head); + spin_unlock(&fman->lock); + } fence->destroy(fence); } @@ -257,7 +256,6 @@ static const struct dma_fence_ops vmw_fence_ops = { .release = vmw_fence_obj_destroy, }; - /* * Execute signal actions on fences recently signaled. * This is done from a workqueue so we don't have to execute @@ -355,7 +353,6 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman, goto out_unlock; } list_add_tail(&fence->head, &fman->fence_list); - ++fman->num_fence_objects; out_unlock: spin_unlock(&fman->lock); @@ -403,7 +400,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, u32 passed_seqno) { u32 goal_seqno; - struct vmw_fence_obj *fence; + struct vmw_fence_obj *fence, *next_fence; if (likely(!fman->seqno_valid)) return false; @@ -413,7 +410,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, return false; fman->seqno_valid = false; - list_for_each_entry(fence, &fman->fence_list, head) { + list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) { if (!list_empty(&fence->seq_passed_actions)) { fman->seqno_valid = true; vmw_fence_goal_write(fman->dev_priv, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index 07185c108218..b9857f37ca1a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /* - * Copyright 2021-2023 VMware, Inc. + * Copyright (c) 2021-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -78,6 +79,59 @@ static struct sg_table *vmw_gem_object_get_sg_table(struct drm_gem_object *obj) return drm_prime_pages_to_sg(obj->dev, vmw_tt->dma_ttm.pages, vmw_tt->dma_ttm.num_pages); } +static int vmw_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map) +{ + struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(obj); + int ret; + + if (obj->import_attach) { + ret = dma_buf_vmap(obj->import_attach->dmabuf, map); + if (!ret) { + if (drm_WARN_ON(obj->dev, map->is_iomem)) { + dma_buf_vunmap(obj->import_attach->dmabuf, map); + return -EIO; + } + } + } else { + ret = ttm_bo_vmap(bo, map); + } + + return ret; +} + +static void vmw_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map) +{ + if (obj->import_attach) + dma_buf_vunmap(obj->import_attach->dmabuf, map); + else + drm_gem_ttm_vunmap(obj, map); +} + +static int vmw_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + int ret; + + if (obj->import_attach) { + /* + * Reset both vm_ops and vm_private_data, so we don't end up with + * vm_ops pointing to our implementation if the dma-buf backend + * doesn't set those fields. + */ + vma->vm_private_data = NULL; + vma->vm_ops = NULL; + + ret = dma_buf_mmap(obj->dma_buf, vma, 0); + + /* Drop the reference drm_gem_mmap_obj() acquired.*/ + if (!ret) + drm_gem_object_put(obj); + + return ret; + } + + return drm_gem_ttm_mmap(obj, vma); +} + static const struct vm_operations_struct vmw_vm_ops = { .pfn_mkwrite = vmw_bo_vm_mkwrite, .page_mkwrite = vmw_bo_vm_mkwrite, @@ -94,9 +148,9 @@ static const struct drm_gem_object_funcs vmw_gem_object_funcs = { .pin = vmw_gem_object_pin, .unpin = vmw_gem_object_unpin, .get_sg_table = vmw_gem_object_get_sg_table, - .vmap = drm_gem_ttm_vmap, - .vunmap = drm_gem_ttm_vunmap, - .mmap = drm_gem_ttm_mmap, + .vmap = vmw_gem_vmap, + .vunmap = vmw_gem_vunmap, + .mmap = vmw_gem_mmap, .vm_ops = &vmw_vm_ops, }; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 00c4ff684130..288ed0bb75cb 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -193,13 +194,16 @@ static u32 vmw_du_cursor_mob_size(u32 w, u32 h) */ static u32 *vmw_du_cursor_plane_acquire_image(struct vmw_plane_state *vps) { - if (vps->surf) { - if (vps->surf_mapped) - return vmw_bo_map_and_cache(vps->surf->res.guest_memory_bo); - return vps->surf->snooper.image; - } else if (vps->bo) - return vmw_bo_map_and_cache(vps->bo); - return NULL; + struct vmw_surface *surf; + + if (vmw_user_object_is_null(&vps->uo)) + return NULL; + + surf = vmw_user_object_surface(&vps->uo); + if (surf && !vmw_user_object_is_mapped(&vps->uo)) + return surf->snooper.image; + + return vmw_user_object_map(&vps->uo); } static bool vmw_du_cursor_plane_has_changed(struct vmw_plane_state *old_vps, @@ -536,22 +540,16 @@ void vmw_du_primary_plane_destroy(struct drm_plane *plane) * vmw_du_plane_unpin_surf - unpins resource associated with a framebuffer surface * * @vps: plane state associated with the display surface - * @unreference: true if we also want to unreference the display. */ -void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps, - bool unreference) +void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps) { - if (vps->surf) { + struct vmw_surface *surf = vmw_user_object_surface(&vps->uo); + + if (surf) { if (vps->pinned) { - vmw_resource_unpin(&vps->surf->res); + vmw_resource_unpin(&surf->res); vps->pinned--; } - - if (unreference) { - if (vps->pinned) - DRM_ERROR("Surface still pinned\n"); - vmw_surface_unreference(&vps->surf); - } } } @@ -572,7 +570,7 @@ vmw_du_plane_cleanup_fb(struct drm_plane *plane, { struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); - vmw_du_plane_unpin_surf(vps, false); + vmw_du_plane_unpin_surf(vps); } @@ -661,25 +659,14 @@ vmw_du_cursor_plane_cleanup_fb(struct drm_plane *plane, struct vmw_cursor_plane *vcp = vmw_plane_to_vcp(plane); struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); - if (vps->surf_mapped) { - vmw_bo_unmap(vps->surf->res.guest_memory_bo); - vps->surf_mapped = false; - } + if (!vmw_user_object_is_null(&vps->uo)) + vmw_user_object_unmap(&vps->uo); vmw_du_cursor_plane_unmap_cm(vps); vmw_du_put_cursor_mob(vcp, vps); - vmw_du_plane_unpin_surf(vps, false); - - if (vps->surf) { - vmw_surface_unreference(&vps->surf); - vps->surf = NULL; - } - - if (vps->bo) { - vmw_bo_unreference(&vps->bo); - vps->bo = NULL; - } + vmw_du_plane_unpin_surf(vps); + vmw_user_object_unref(&vps->uo); } @@ -698,64 +685,48 @@ vmw_du_cursor_plane_prepare_fb(struct drm_plane *plane, struct drm_framebuffer *fb = new_state->fb; struct vmw_cursor_plane *vcp = vmw_plane_to_vcp(plane); struct vmw_plane_state *vps = vmw_plane_state_to_vps(new_state); + struct vmw_bo *bo = NULL; int ret = 0; - if (vps->surf) { - if (vps->surf_mapped) { - vmw_bo_unmap(vps->surf->res.guest_memory_bo); - vps->surf_mapped = false; - } - vmw_surface_unreference(&vps->surf); - vps->surf = NULL; - } - - if (vps->bo) { - vmw_bo_unreference(&vps->bo); - vps->bo = NULL; + if (!vmw_user_object_is_null(&vps->uo)) { + vmw_user_object_unmap(&vps->uo); + vmw_user_object_unref(&vps->uo); } if (fb) { if (vmw_framebuffer_to_vfb(fb)->bo) { - vps->bo = vmw_framebuffer_to_vfbd(fb)->buffer; - vmw_bo_reference(vps->bo); + vps->uo.buffer = vmw_framebuffer_to_vfbd(fb)->buffer; + vps->uo.surface = NULL; } else { - vps->surf = vmw_framebuffer_to_vfbs(fb)->surface; - vmw_surface_reference(vps->surf); + memcpy(&vps->uo, &vmw_framebuffer_to_vfbs(fb)->uo, sizeof(vps->uo)); } + vmw_user_object_ref(&vps->uo); } - if (!vps->surf && vps->bo) { - const u32 size = new_state->crtc_w * new_state->crtc_h * sizeof(u32); + bo = vmw_user_object_buffer(&vps->uo); + if (bo) { + struct ttm_operation_ctx ctx = {false, false}; - /* - * Not using vmw_bo_map_and_cache() helper here as we need to - * reserve the ttm_buffer_object first which - * vmw_bo_map_and_cache() omits. - */ - ret = ttm_bo_reserve(&vps->bo->tbo, true, false, NULL); - - if (unlikely(ret != 0)) + ret = ttm_bo_reserve(&bo->tbo, true, false, NULL); + if (ret != 0) return -ENOMEM; - ret = ttm_bo_kmap(&vps->bo->tbo, 0, PFN_UP(size), &vps->bo->map); - - ttm_bo_unreserve(&vps->bo->tbo); - - if (unlikely(ret != 0)) + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret != 0) return -ENOMEM; - } else if (vps->surf && !vps->bo && vps->surf->res.guest_memory_bo) { - WARN_ON(vps->surf->snooper.image); - ret = ttm_bo_reserve(&vps->surf->res.guest_memory_bo->tbo, true, false, - NULL); - if (unlikely(ret != 0)) - return -ENOMEM; - vmw_bo_map_and_cache(vps->surf->res.guest_memory_bo); - ttm_bo_unreserve(&vps->surf->res.guest_memory_bo->tbo); - vps->surf_mapped = true; + vmw_bo_pin_reserved(bo, true); + if (vmw_framebuffer_to_vfb(fb)->bo) { + const u32 size = new_state->crtc_w * new_state->crtc_h * sizeof(u32); + + (void)vmw_bo_map_and_cache_size(bo, size); + } else { + vmw_bo_map_and_cache(bo); + } + ttm_bo_unreserve(&bo->tbo); } - if (vps->surf || vps->bo) { + if (!vmw_user_object_is_null(&vps->uo)) { vmw_du_get_cursor_mob(vcp, vps); vmw_du_cursor_plane_map_cm(vps); } @@ -777,14 +748,17 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, struct vmw_display_unit *du = vmw_crtc_to_du(crtc); struct vmw_plane_state *vps = vmw_plane_state_to_vps(new_state); struct vmw_plane_state *old_vps = vmw_plane_state_to_vps(old_state); + struct vmw_bo *old_bo = NULL; + struct vmw_bo *new_bo = NULL; s32 hotspot_x, hotspot_y; + int ret; hotspot_x = du->hotspot_x + new_state->hotspot_x; hotspot_y = du->hotspot_y + new_state->hotspot_y; - du->cursor_surface = vps->surf; + du->cursor_surface = vmw_user_object_surface(&vps->uo); - if (!vps->surf && !vps->bo) { + if (vmw_user_object_is_null(&vps->uo)) { vmw_cursor_update_position(dev_priv, false, 0, 0); return; } @@ -792,10 +766,26 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, vps->cursor.hotspot_x = hotspot_x; vps->cursor.hotspot_y = hotspot_y; - if (vps->surf) { + if (du->cursor_surface) du->cursor_age = du->cursor_surface->snooper.age; + + if (!vmw_user_object_is_null(&old_vps->uo)) { + old_bo = vmw_user_object_buffer(&old_vps->uo); + ret = ttm_bo_reserve(&old_bo->tbo, false, false, NULL); + if (ret != 0) + return; } + if (!vmw_user_object_is_null(&vps->uo)) { + new_bo = vmw_user_object_buffer(&vps->uo); + if (old_bo != new_bo) { + ret = ttm_bo_reserve(&new_bo->tbo, false, false, NULL); + if (ret != 0) + return; + } else { + new_bo = NULL; + } + } if (!vmw_du_cursor_plane_has_changed(old_vps, vps)) { /* * If it hasn't changed, avoid making the device do extra @@ -813,6 +803,11 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, hotspot_x, hotspot_y); } + if (old_bo) + ttm_bo_unreserve(&old_bo->tbo); + if (new_bo) + ttm_bo_unreserve(&new_bo->tbo); + du->cursor_x = new_state->crtc_x + du->set_gui_x; du->cursor_y = new_state->crtc_y + du->set_gui_y; @@ -913,7 +908,7 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane, } if (!vmw_framebuffer_to_vfb(fb)->bo) { - surface = vmw_framebuffer_to_vfbs(fb)->surface; + surface = vmw_user_object_surface(&vmw_framebuffer_to_vfbs(fb)->uo); WARN_ON(!surface); @@ -1074,12 +1069,7 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane) memset(&vps->cursor, 0, sizeof(vps->cursor)); /* Each ref counted resource needs to be acquired again */ - if (vps->surf) - (void) vmw_surface_reference(vps->surf); - - if (vps->bo) - (void) vmw_bo_reference(vps->bo); - + vmw_user_object_ref(&vps->uo); state = &vps->base; __drm_atomic_helper_plane_duplicate_state(plane, state); @@ -1128,11 +1118,7 @@ vmw_du_plane_destroy_state(struct drm_plane *plane, struct vmw_plane_state *vps = vmw_plane_state_to_vps(state); /* Should have been freed by cleanup_fb */ - if (vps->surf) - vmw_surface_unreference(&vps->surf); - - if (vps->bo) - vmw_bo_unreference(&vps->bo); + vmw_user_object_unref(&vps->uo); drm_atomic_helper_plane_destroy_state(plane, state); } @@ -1227,7 +1213,7 @@ static void vmw_framebuffer_surface_destroy(struct drm_framebuffer *framebuffer) vmw_framebuffer_to_vfbs(framebuffer); drm_framebuffer_cleanup(framebuffer); - vmw_surface_unreference(&vfbs->surface); + vmw_user_object_unref(&vfbs->uo); kfree(vfbs); } @@ -1272,29 +1258,41 @@ int vmw_kms_readback(struct vmw_private *dev_priv, return -ENOSYS; } +static int vmw_framebuffer_surface_create_handle(struct drm_framebuffer *fb, + struct drm_file *file_priv, + unsigned int *handle) +{ + struct vmw_framebuffer_surface *vfbs = vmw_framebuffer_to_vfbs(fb); + struct vmw_bo *bo = vmw_user_object_buffer(&vfbs->uo); + + return drm_gem_handle_create(file_priv, &bo->tbo.base, handle); +} static const struct drm_framebuffer_funcs vmw_framebuffer_surface_funcs = { + .create_handle = vmw_framebuffer_surface_create_handle, .destroy = vmw_framebuffer_surface_destroy, .dirty = drm_atomic_helper_dirtyfb, }; static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, - struct vmw_surface *surface, + struct vmw_user_object *uo, struct vmw_framebuffer **out, const struct drm_mode_fb_cmd2 - *mode_cmd, - bool is_bo_proxy) + *mode_cmd) { struct drm_device *dev = &dev_priv->drm; struct vmw_framebuffer_surface *vfbs; enum SVGA3dSurfaceFormat format; + struct vmw_surface *surface; int ret; /* 3D is only supported on HWv8 and newer hosts */ if (dev_priv->active_display_unit == vmw_du_legacy) return -ENOSYS; + surface = vmw_user_object_surface(uo); + /* * Sanity checks. */ @@ -1357,8 +1355,8 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, } drm_helper_mode_fill_fb_struct(dev, &vfbs->base.base, mode_cmd); - vfbs->surface = vmw_surface_reference(surface); - vfbs->is_bo_proxy = is_bo_proxy; + memcpy(&vfbs->uo, uo, sizeof(vfbs->uo)); + vmw_user_object_ref(&vfbs->uo); *out = &vfbs->base; @@ -1370,7 +1368,7 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, return 0; out_err2: - vmw_surface_unreference(&surface); + vmw_user_object_unref(&vfbs->uo); kfree(vfbs); out_err1: return ret; @@ -1386,7 +1384,6 @@ static int vmw_framebuffer_bo_create_handle(struct drm_framebuffer *fb, { struct vmw_framebuffer_bo *vfbd = vmw_framebuffer_to_vfbd(fb); - return drm_gem_handle_create(file_priv, &vfbd->buffer->tbo.base, handle); } @@ -1407,86 +1404,6 @@ static const struct drm_framebuffer_funcs vmw_framebuffer_bo_funcs = { .dirty = drm_atomic_helper_dirtyfb, }; -/** - * vmw_create_bo_proxy - create a proxy surface for the buffer object - * - * @dev: DRM device - * @mode_cmd: parameters for the new surface - * @bo_mob: MOB backing the buffer object - * @srf_out: newly created surface - * - * When the content FB is a buffer object, we create a surface as a proxy to the - * same buffer. This way we can do a surface copy rather than a surface DMA. - * This is a more efficient approach - * - * RETURNS: - * 0 on success, error code otherwise - */ -static int vmw_create_bo_proxy(struct drm_device *dev, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct vmw_bo *bo_mob, - struct vmw_surface **srf_out) -{ - struct vmw_surface_metadata metadata = {0}; - uint32_t format; - struct vmw_resource *res; - unsigned int bytes_pp; - int ret; - - switch (mode_cmd->pixel_format) { - case DRM_FORMAT_ARGB8888: - case DRM_FORMAT_XRGB8888: - format = SVGA3D_X8R8G8B8; - bytes_pp = 4; - break; - - case DRM_FORMAT_RGB565: - case DRM_FORMAT_XRGB1555: - format = SVGA3D_R5G6B5; - bytes_pp = 2; - break; - - case 8: - format = SVGA3D_P8; - bytes_pp = 1; - break; - - default: - DRM_ERROR("Invalid framebuffer format %p4cc\n", - &mode_cmd->pixel_format); - return -EINVAL; - } - - metadata.format = format; - metadata.mip_levels[0] = 1; - metadata.num_sizes = 1; - metadata.base_size.width = mode_cmd->pitches[0] / bytes_pp; - metadata.base_size.height = mode_cmd->height; - metadata.base_size.depth = 1; - metadata.scanout = true; - - ret = vmw_gb_surface_define(vmw_priv(dev), &metadata, srf_out); - if (ret) { - DRM_ERROR("Failed to allocate proxy content buffer\n"); - return ret; - } - - res = &(*srf_out)->res; - - /* Reserve and switch the backing mob. */ - mutex_lock(&res->dev_priv->cmdbuf_mutex); - (void) vmw_resource_reserve(res, false, true); - vmw_user_bo_unref(&res->guest_memory_bo); - res->guest_memory_bo = vmw_user_bo_ref(bo_mob); - res->guest_memory_offset = 0; - vmw_resource_unreserve(res, false, false, false, NULL, 0); - mutex_unlock(&res->dev_priv->cmdbuf_mutex); - - return 0; -} - - - static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv, struct vmw_bo *bo, struct vmw_framebuffer **out, @@ -1565,55 +1482,24 @@ vmw_kms_srf_ok(struct vmw_private *dev_priv, uint32_t width, uint32_t height) * vmw_kms_new_framebuffer - Create a new framebuffer. * * @dev_priv: Pointer to device private struct. - * @bo: Pointer to buffer object to wrap the kms framebuffer around. - * Either @bo or @surface must be NULL. - * @surface: Pointer to a surface to wrap the kms framebuffer around. - * Either @bo or @surface must be NULL. - * @only_2d: No presents will occur to this buffer object based framebuffer. - * This helps the code to do some important optimizations. + * @uo: Pointer to user object to wrap the kms framebuffer around. + * Either the buffer or surface inside the user object must be NULL. * @mode_cmd: Frame-buffer metadata. */ struct vmw_framebuffer * vmw_kms_new_framebuffer(struct vmw_private *dev_priv, - struct vmw_bo *bo, - struct vmw_surface *surface, - bool only_2d, + struct vmw_user_object *uo, const struct drm_mode_fb_cmd2 *mode_cmd) { struct vmw_framebuffer *vfb = NULL; - bool is_bo_proxy = false; int ret; - /* - * We cannot use the SurfaceDMA command in an non-accelerated VM, - * therefore, wrap the buffer object in a surface so we can use the - * SurfaceCopy command. - */ - if (vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height) && - bo && only_2d && - mode_cmd->width > 64 && /* Don't create a proxy for cursor */ - dev_priv->active_display_unit == vmw_du_screen_target) { - ret = vmw_create_bo_proxy(&dev_priv->drm, mode_cmd, - bo, &surface); - if (ret) - return ERR_PTR(ret); - - is_bo_proxy = true; - } - /* Create the new framebuffer depending one what we have */ - if (surface) { - ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb, - mode_cmd, - is_bo_proxy); - /* - * vmw_create_bo_proxy() adds a reference that is no longer - * needed - */ - if (is_bo_proxy) - vmw_surface_unreference(&surface); - } else if (bo) { - ret = vmw_kms_new_framebuffer_bo(dev_priv, bo, &vfb, + if (vmw_user_object_surface(uo)) { + ret = vmw_kms_new_framebuffer_surface(dev_priv, uo, &vfb, + mode_cmd); + } else if (uo->buffer) { + ret = vmw_kms_new_framebuffer_bo(dev_priv, uo->buffer, &vfb, mode_cmd); } else { BUG(); @@ -1635,14 +1521,12 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, { struct vmw_private *dev_priv = vmw_priv(dev); struct vmw_framebuffer *vfb = NULL; - struct vmw_surface *surface = NULL; - struct vmw_bo *bo = NULL; + struct vmw_user_object uo = {0}; int ret; /* returns either a bo or surface */ - ret = vmw_user_lookup_handle(dev_priv, file_priv, - mode_cmd->handles[0], - &surface, &bo); + ret = vmw_user_object_lookup(dev_priv, file_priv, mode_cmd->handles[0], + &uo); if (ret) { DRM_ERROR("Invalid buffer object handle %u (0x%x).\n", mode_cmd->handles[0], mode_cmd->handles[0]); @@ -1650,7 +1534,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, } - if (!bo && + if (vmw_user_object_surface(&uo) && !vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height)) { DRM_ERROR("Surface size cannot exceed %dx%d\n", dev_priv->texture_max_width, @@ -1659,20 +1543,15 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, } - vfb = vmw_kms_new_framebuffer(dev_priv, bo, surface, - !(dev_priv->capabilities & SVGA_CAP_3D), - mode_cmd); + vfb = vmw_kms_new_framebuffer(dev_priv, &uo, mode_cmd); if (IS_ERR(vfb)) { ret = PTR_ERR(vfb); goto err_out; } err_out: - /* vmw_user_lookup_handle takes one ref so does new_fb */ - if (bo) - vmw_user_bo_unref(&bo); - if (surface) - vmw_surface_unreference(&surface); + /* vmw_user_object_lookup takes one ref so does new_fb */ + vmw_user_object_unref(&uo); if (ret) { DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret); @@ -2585,72 +2464,6 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv, } /** - * vmw_kms_update_proxy - Helper function to update a proxy surface from - * its backing MOB. - * - * @res: Pointer to the surface resource - * @clips: Clip rects in framebuffer (surface) space. - * @num_clips: Number of clips in @clips. - * @increment: Integer with which to increment the clip counter when looping. - * Used to skip a predetermined number of clip rects. - * - * This function makes sure the proxy surface is updated from its backing MOB - * using the region given by @clips. The surface resource @res and its backing - * MOB needs to be reserved and validated on call. - */ -int vmw_kms_update_proxy(struct vmw_resource *res, - const struct drm_clip_rect *clips, - unsigned num_clips, - int increment) -{ - struct vmw_private *dev_priv = res->dev_priv; - struct drm_vmw_size *size = &vmw_res_to_srf(res)->metadata.base_size; - struct { - SVGA3dCmdHeader header; - SVGA3dCmdUpdateGBImage body; - } *cmd; - SVGA3dBox *box; - size_t copy_size = 0; - int i; - - if (!clips) - return 0; - - cmd = VMW_CMD_RESERVE(dev_priv, sizeof(*cmd) * num_clips); - if (!cmd) - return -ENOMEM; - - for (i = 0; i < num_clips; ++i, clips += increment, ++cmd) { - box = &cmd->body.box; - - cmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE; - cmd->header.size = sizeof(cmd->body); - cmd->body.image.sid = res->id; - cmd->body.image.face = 0; - cmd->body.image.mipmap = 0; - - if (clips->x1 > size->width || clips->x2 > size->width || - clips->y1 > size->height || clips->y2 > size->height) { - DRM_ERROR("Invalid clips outsize of framebuffer.\n"); - return -EINVAL; - } - - box->x = clips->x1; - box->y = clips->y1; - box->z = 0; - box->w = clips->x2 - clips->x1; - box->h = clips->y2 - clips->y1; - box->d = 1; - - copy_size += sizeof(*cmd); - } - - vmw_cmd_commit(dev_priv, copy_size); - - return 0; -} - -/** * vmw_kms_create_implicit_placement_property - Set up the implicit placement * property. * @@ -2784,8 +2597,9 @@ int vmw_du_helper_plane_update(struct vmw_du_update_plane *update) } else { struct vmw_framebuffer_surface *vfbs = container_of(update->vfb, typeof(*vfbs), base); + struct vmw_surface *surf = vmw_user_object_surface(&vfbs->uo); - ret = vmw_validation_add_resource(&val_ctx, &vfbs->surface->res, + ret = vmw_validation_add_resource(&val_ctx, &surf->res, 0, VMW_RES_DIRTY_NONE, NULL, NULL); } @@ -2941,3 +2755,93 @@ int vmw_connector_get_modes(struct drm_connector *connector) return num_modes; } + +struct vmw_user_object *vmw_user_object_ref(struct vmw_user_object *uo) +{ + if (uo->buffer) + vmw_user_bo_ref(uo->buffer); + else if (uo->surface) + vmw_surface_reference(uo->surface); + return uo; +} + +void vmw_user_object_unref(struct vmw_user_object *uo) +{ + if (uo->buffer) + vmw_user_bo_unref(&uo->buffer); + else if (uo->surface) + vmw_surface_unreference(&uo->surface); +} + +struct vmw_bo * +vmw_user_object_buffer(struct vmw_user_object *uo) +{ + if (uo->buffer) + return uo->buffer; + else if (uo->surface) + return uo->surface->res.guest_memory_bo; + return NULL; +} + +struct vmw_surface * +vmw_user_object_surface(struct vmw_user_object *uo) +{ + if (uo->buffer) + return uo->buffer->dumb_surface; + return uo->surface; +} + +void *vmw_user_object_map(struct vmw_user_object *uo) +{ + struct vmw_bo *bo = vmw_user_object_buffer(uo); + + WARN_ON(!bo); + return vmw_bo_map_and_cache(bo); +} + +void *vmw_user_object_map_size(struct vmw_user_object *uo, size_t size) +{ + struct vmw_bo *bo = vmw_user_object_buffer(uo); + + WARN_ON(!bo); + return vmw_bo_map_and_cache_size(bo, size); +} + +void vmw_user_object_unmap(struct vmw_user_object *uo) +{ + struct vmw_bo *bo = vmw_user_object_buffer(uo); + int ret; + + WARN_ON(!bo); + + /* Fence the mob creation so we are guarateed to have the mob */ + ret = ttm_bo_reserve(&bo->tbo, false, false, NULL); + if (ret != 0) + return; + + vmw_bo_unmap(bo); + vmw_bo_pin_reserved(bo, false); + + ttm_bo_unreserve(&bo->tbo); +} + +bool vmw_user_object_is_mapped(struct vmw_user_object *uo) +{ + struct vmw_bo *bo; + + if (!uo || vmw_user_object_is_null(uo)) + return false; + + bo = vmw_user_object_buffer(uo); + + if (WARN_ON(!bo)) + return false; + + WARN_ON(bo->map.bo && !bo->map.virtual); + return bo->map.virtual; +} + +bool vmw_user_object_is_null(struct vmw_user_object *uo) +{ + return !uo->buffer && !uo->surface; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index bf24f2f0dcfc..6141fadf81ef 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -221,11 +222,9 @@ struct vmw_framebuffer { struct vmw_framebuffer_surface { struct vmw_framebuffer base; - struct vmw_surface *surface; - bool is_bo_proxy; /* true if this is proxy surface for DMA buf */ + struct vmw_user_object uo; }; - struct vmw_framebuffer_bo { struct vmw_framebuffer base; struct vmw_bo *buffer; @@ -277,8 +276,7 @@ struct vmw_cursor_plane_state { */ struct vmw_plane_state { struct drm_plane_state base; - struct vmw_surface *surf; - struct vmw_bo *bo; + struct vmw_user_object uo; int content_fb_type; unsigned long bo_size; @@ -457,9 +455,7 @@ int vmw_kms_readback(struct vmw_private *dev_priv, uint32_t num_clips); struct vmw_framebuffer * vmw_kms_new_framebuffer(struct vmw_private *dev_priv, - struct vmw_bo *bo, - struct vmw_surface *surface, - bool only_2d, + struct vmw_user_object *uo, const struct drm_mode_fb_cmd2 *mode_cmd); void vmw_guess_mode_timing(struct drm_display_mode *mode); void vmw_kms_update_implicit_fb(struct vmw_private *dev_priv); @@ -486,8 +482,7 @@ void vmw_du_plane_reset(struct drm_plane *plane); struct drm_plane_state *vmw_du_plane_duplicate_state(struct drm_plane *plane); void vmw_du_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state); -void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps, - bool unreference); +void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps); int vmw_du_crtc_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 5befc2719a49..39949e0a493f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -147,8 +148,9 @@ static int vmw_ldu_fb_pin(struct vmw_framebuffer *vfb) struct vmw_bo *buf; int ret; - buf = vfb->bo ? vmw_framebuffer_to_vfbd(&vfb->base)->buffer : - vmw_framebuffer_to_vfbs(&vfb->base)->surface->res.guest_memory_bo; + buf = vfb->bo ? + vmw_framebuffer_to_vfbd(&vfb->base)->buffer : + vmw_user_object_buffer(&vmw_framebuffer_to_vfbs(&vfb->base)->uo); if (!buf) return 0; @@ -169,8 +171,10 @@ static int vmw_ldu_fb_unpin(struct vmw_framebuffer *vfb) struct vmw_private *dev_priv = vmw_priv(vfb->base.dev); struct vmw_bo *buf; - buf = vfb->bo ? vmw_framebuffer_to_vfbd(&vfb->base)->buffer : - vmw_framebuffer_to_vfbs(&vfb->base)->surface->res.guest_memory_bo; + buf = vfb->bo ? + vmw_framebuffer_to_vfbd(&vfb->base)->buffer : + vmw_user_object_buffer(&vmw_framebuffer_to_vfbs(&vfb->base)->uo); + if (WARN_ON(!buf)) return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c index c45b4724e414..e20f64b67b26 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c @@ -92,7 +92,7 @@ static int vmw_overlay_send_put(struct vmw_private *dev_priv, { struct vmw_escape_video_flush *flush; size_t fifo_size; - bool have_so = (dev_priv->active_display_unit == vmw_du_screen_object); + bool have_so = (dev_priv->active_display_unit != vmw_du_legacy); int i, num_items; SVGAGuestPtr ptr; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c index c99cad444991..598b90ac7590 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2013 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2013-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -31,6 +32,7 @@ */ #include "vmwgfx_drv.h" +#include "vmwgfx_bo.h" #include "ttm_object.h" #include <linux/dma-buf.h> @@ -88,13 +90,35 @@ int vmw_prime_handle_to_fd(struct drm_device *dev, uint32_t handle, uint32_t flags, int *prime_fd) { + struct vmw_private *vmw = vmw_priv(dev); struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + struct vmw_bo *vbo; int ret; + int surf_handle; - if (handle > VMWGFX_NUM_MOB) + if (handle > VMWGFX_NUM_MOB) { ret = ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd); - else - ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd); + } else { + ret = vmw_user_bo_lookup(file_priv, handle, &vbo); + if (ret) + return ret; + if (vbo && vbo->is_dumb) { + ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, + flags, prime_fd); + } else { + surf_handle = vmw_lookup_surface_handle_for_buffer(vmw, + vbo, + handle); + if (surf_handle > 0) + ret = ttm_prime_handle_to_fd(tfile, surf_handle, + flags, prime_fd); + else + ret = drm_gem_prime_handle_to_fd(dev, file_priv, + handle, flags, + prime_fd); + } + vmw_user_bo_unref(&vbo); + } return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 848dba09981b..a73af8a355fb 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -58,6 +59,7 @@ void vmw_resource_mob_attach(struct vmw_resource *res) rb_link_node(&res->mob_node, parent, new); rb_insert_color(&res->mob_node, &gbo->res_tree); + vmw_bo_del_detached_resource(gbo, res); vmw_bo_prio_add(gbo, res->used_prio); } @@ -287,28 +289,35 @@ out_bad_resource: * * The pointer this pointed at by out_surf and out_buf needs to be null. */ -int vmw_user_lookup_handle(struct vmw_private *dev_priv, +int vmw_user_object_lookup(struct vmw_private *dev_priv, struct drm_file *filp, - uint32_t handle, - struct vmw_surface **out_surf, - struct vmw_bo **out_buf) + u32 handle, + struct vmw_user_object *uo) { struct ttm_object_file *tfile = vmw_fpriv(filp)->tfile; struct vmw_resource *res; int ret; - BUG_ON(*out_surf || *out_buf); + WARN_ON(uo->surface || uo->buffer); ret = vmw_user_resource_lookup_handle(dev_priv, tfile, handle, user_surface_converter, &res); if (!ret) { - *out_surf = vmw_res_to_srf(res); + uo->surface = vmw_res_to_srf(res); return 0; } - *out_surf = NULL; - ret = vmw_user_bo_lookup(filp, handle, out_buf); + uo->surface = NULL; + ret = vmw_user_bo_lookup(filp, handle, &uo->buffer); + if (!ret && !uo->buffer->is_dumb) { + uo->surface = vmw_lookup_surface_for_buffer(dev_priv, + uo->buffer, + handle); + if (uo->surface) + vmw_user_bo_unref(&uo->buffer); + } + return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index df0039a8ef29..0f4bfd98480a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2011-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2011-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -240,7 +241,7 @@ static void vmw_sou_crtc_mode_set_nofb(struct drm_crtc *crtc) struct vmw_connector_state *vmw_conn_state; int x, y; - sou->buffer = vps->bo; + sou->buffer = vmw_user_object_buffer(&vps->uo); conn_state = sou->base.connector.state; vmw_conn_state = vmw_connector_state_to_vcs(conn_state); @@ -376,10 +377,11 @@ vmw_sou_primary_plane_cleanup_fb(struct drm_plane *plane, struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); struct drm_crtc *crtc = plane->state->crtc ? plane->state->crtc : old_state->crtc; + struct vmw_bo *bo = vmw_user_object_buffer(&vps->uo); - if (vps->bo) - vmw_bo_unpin(vmw_priv(crtc->dev), vps->bo, false); - vmw_bo_unreference(&vps->bo); + if (bo) + vmw_bo_unpin(vmw_priv(crtc->dev), bo, false); + vmw_user_object_unref(&vps->uo); vps->bo_size = 0; vmw_du_plane_cleanup_fb(plane, old_state); @@ -411,9 +413,10 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, .bo_type = ttm_bo_type_device, .pin = true }; + struct vmw_bo *bo = NULL; if (!new_fb) { - vmw_bo_unreference(&vps->bo); + vmw_user_object_unref(&vps->uo); vps->bo_size = 0; return 0; @@ -422,17 +425,17 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, bo_params.size = new_state->crtc_w * new_state->crtc_h * 4; dev_priv = vmw_priv(crtc->dev); - if (vps->bo) { + bo = vmw_user_object_buffer(&vps->uo); + if (bo) { if (vps->bo_size == bo_params.size) { /* * Note that this might temporarily up the pin-count * to 2, until cleanup_fb() is called. */ - return vmw_bo_pin_in_vram(dev_priv, vps->bo, - true); + return vmw_bo_pin_in_vram(dev_priv, bo, true); } - vmw_bo_unreference(&vps->bo); + vmw_user_object_unref(&vps->uo); vps->bo_size = 0; } @@ -442,7 +445,7 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, * resume the overlays, this is preferred to failing to alloc. */ vmw_overlay_pause_all(dev_priv); - ret = vmw_bo_create(dev_priv, &bo_params, &vps->bo); + ret = vmw_gem_object_create(dev_priv, &bo_params, &vps->uo.buffer); vmw_overlay_resume_all(dev_priv); if (ret) return ret; @@ -453,7 +456,7 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, * TTM already thinks the buffer is pinned, but make sure the * pin_count is upped. */ - return vmw_bo_pin_in_vram(dev_priv, vps->bo, true); + return vmw_bo_pin_in_vram(dev_priv, vps->uo.buffer, true); } static uint32_t vmw_sou_bo_fifo_size(struct vmw_du_update_plane *update, @@ -580,6 +583,7 @@ static uint32_t vmw_sou_surface_pre_clip(struct vmw_du_update_plane *update, { struct vmw_kms_sou_dirty_cmd *blit = cmd; struct vmw_framebuffer_surface *vfbs; + struct vmw_surface *surf = NULL; vfbs = container_of(update->vfb, typeof(*vfbs), base); @@ -587,7 +591,8 @@ static uint32_t vmw_sou_surface_pre_clip(struct vmw_du_update_plane *update, blit->header.size = sizeof(blit->body) + sizeof(SVGASignedRect) * num_hits; - blit->body.srcImage.sid = vfbs->surface->res.id; + surf = vmw_user_object_surface(&vfbs->uo); + blit->body.srcImage.sid = surf->res.id; blit->body.destScreenId = update->du->unit; /* Update the source and destination bounding box later in post_clip */ @@ -1104,7 +1109,7 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv, int ret; if (!srf) - srf = &vfbs->surface->res; + srf = &vmw_user_object_surface(&vfbs->uo)->res; ret = vmw_validation_add_resource(&val_ctx, srf, 0, VMW_RES_DIRTY_NONE, NULL, NULL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index a04e0736318d..5453f7cf0e2d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /****************************************************************************** * - * COPYRIGHT (C) 2014-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2014-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -29,6 +30,7 @@ #include "vmwgfx_kms.h" #include "vmwgfx_vkms.h" #include "vmw_surface_cache.h" +#include <linux/fsnotify.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> @@ -735,7 +737,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, int ret; if (!srf) - srf = &vfbs->surface->res; + srf = &vmw_user_object_surface(&vfbs->uo)->res; ret = vmw_validation_add_resource(&val_ctx, srf, 0, VMW_RES_DIRTY_NONE, NULL, NULL); @@ -746,12 +748,6 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, if (ret) goto out_unref; - if (vfbs->is_bo_proxy) { - ret = vmw_kms_update_proxy(srf, clips, num_clips, inc); - if (ret) - goto out_finish; - } - sdirty.base.fifo_commit = vmw_kms_stdu_surface_fifo_commit; sdirty.base.clip = vmw_kms_stdu_surface_clip; sdirty.base.fifo_reserve_size = sizeof(struct vmw_stdu_surface_copy) + @@ -765,7 +761,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, ret = vmw_kms_helper_dirty(dev_priv, framebuffer, clips, vclips, dest_x, dest_y, num_clips, inc, &sdirty.base); -out_finish: + vmw_kms_helper_validation_finish(dev_priv, NULL, &val_ctx, out_fence, NULL); @@ -877,6 +873,32 @@ vmw_stdu_connector_mode_valid(struct drm_connector *connector, return MODE_OK; } +/* + * Trigger a modeset if the X,Y position of the Screen Target changes. + * This is needed when multi-mon is cycled. The original Screen Target will have + * the same mode but its relative X,Y position in the topology will change. + */ +static int vmw_stdu_connector_atomic_check(struct drm_connector *conn, + struct drm_atomic_state *state) +{ + struct drm_connector_state *conn_state; + struct vmw_screen_target_display_unit *du; + struct drm_crtc_state *new_crtc_state; + + conn_state = drm_atomic_get_connector_state(state, conn); + du = vmw_connector_to_stdu(conn); + + if (!conn_state->crtc) + return 0; + + new_crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); + if (du->base.gui_x != du->base.set_gui_x || + du->base.gui_y != du->base.set_gui_y) + new_crtc_state->mode_changed = true; + + return 0; +} + static const struct drm_connector_funcs vmw_stdu_connector_funcs = { .dpms = vmw_du_connector_dpms, .detect = vmw_du_connector_detect, @@ -891,7 +913,8 @@ static const struct drm_connector_funcs vmw_stdu_connector_funcs = { static const struct drm_connector_helper_funcs vmw_stdu_connector_helper_funcs = { .get_modes = vmw_connector_get_modes, - .mode_valid = vmw_stdu_connector_mode_valid + .mode_valid = vmw_stdu_connector_mode_valid, + .atomic_check = vmw_stdu_connector_atomic_check, }; @@ -918,9 +941,8 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane, { struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); - if (vps->surf) + if (vmw_user_object_surface(&vps->uo)) WARN_ON(!vps->pinned); - vmw_du_plane_cleanup_fb(plane, old_state); vps->content_fb_type = SAME_AS_DISPLAY; @@ -928,7 +950,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane, } - /** * vmw_stdu_primary_plane_prepare_fb - Readies the display surface * @@ -952,13 +973,15 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, enum stdu_content_type new_content_type; struct vmw_framebuffer_surface *new_vfbs; uint32_t hdisplay = new_state->crtc_w, vdisplay = new_state->crtc_h; + struct drm_plane_state *old_state = plane->state; + struct drm_rect rect; int ret; /* No FB to prepare */ if (!new_fb) { - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { WARN_ON(vps->pinned != 0); - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); } return 0; @@ -968,8 +991,8 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, new_vfbs = (vfb->bo) ? NULL : vmw_framebuffer_to_vfbs(new_fb); if (new_vfbs && - new_vfbs->surface->metadata.base_size.width == hdisplay && - new_vfbs->surface->metadata.base_size.height == vdisplay) + vmw_user_object_surface(&new_vfbs->uo)->metadata.base_size.width == hdisplay && + vmw_user_object_surface(&new_vfbs->uo)->metadata.base_size.height == vdisplay) new_content_type = SAME_AS_DISPLAY; else if (vfb->bo) new_content_type = SEPARATE_BO; @@ -1007,29 +1030,29 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, metadata.num_sizes = 1; metadata.scanout = true; } else { - metadata = new_vfbs->surface->metadata; + metadata = vmw_user_object_surface(&new_vfbs->uo)->metadata; } metadata.base_size.width = hdisplay; metadata.base_size.height = vdisplay; metadata.base_size.depth = 1; - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { struct drm_vmw_size cur_base_size = - vps->surf->metadata.base_size; + vmw_user_object_surface(&vps->uo)->metadata.base_size; if (cur_base_size.width != metadata.base_size.width || cur_base_size.height != metadata.base_size.height || - vps->surf->metadata.format != metadata.format) { + vmw_user_object_surface(&vps->uo)->metadata.format != metadata.format) { WARN_ON(vps->pinned != 0); - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); } } - if (!vps->surf) { + if (!vmw_user_object_surface(&vps->uo)) { ret = vmw_gb_surface_define(dev_priv, &metadata, - &vps->surf); + &vps->uo.surface); if (ret != 0) { DRM_ERROR("Couldn't allocate STDU surface.\n"); return ret; @@ -1042,18 +1065,19 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, * The only time we add a reference in prepare_fb is if the * state object doesn't have a reference to begin with */ - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { WARN_ON(vps->pinned != 0); - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); } - vps->surf = vmw_surface_reference(new_vfbs->surface); + memcpy(&vps->uo, &new_vfbs->uo, sizeof(vps->uo)); + vmw_user_object_ref(&vps->uo); } - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { /* Pin new surface before flipping */ - ret = vmw_resource_pin(&vps->surf->res, false); + ret = vmw_resource_pin(&vmw_user_object_surface(&vps->uo)->res, false); if (ret) goto out_srf_unref; @@ -1063,6 +1087,34 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, vps->content_fb_type = new_content_type; /* + * The drm fb code will do blit's via the vmap interface, which doesn't + * trigger vmw_bo page dirty tracking due to being kernel side (and thus + * doesn't require mmap'ing) so we have to update the surface's dirty + * regions by hand but we want to be careful to not overwrite the + * resource if it has been written to by the gpu (res_dirty). + */ + if (vps->uo.buffer && vps->uo.buffer->is_dumb) { + struct vmw_surface *surf = vmw_user_object_surface(&vps->uo); + struct vmw_resource *res = &surf->res; + + if (!res->res_dirty && drm_atomic_helper_damage_merged(old_state, + new_state, + &rect)) { + /* + * At some point it might be useful to actually translate + * (rect.x1, rect.y1) => start, and (rect.x2, rect.y2) => end, + * but currently the fb code will just report the entire fb + * dirty so in practice it doesn't matter. + */ + pgoff_t start = res->guest_memory_offset >> PAGE_SHIFT; + pgoff_t end = __KERNEL_DIV_ROUND_UP(res->guest_memory_offset + + res->guest_memory_size, + PAGE_SIZE); + vmw_resource_dirty_update(res, start, end); + } + } + + /* * This should only happen if the buffer object is too large to create a * proxy surface for. */ @@ -1072,7 +1124,7 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, return 0; out_srf_unref: - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); return ret; } @@ -1214,14 +1266,8 @@ static uint32_t vmw_stdu_surface_fifo_size_same_display(struct vmw_du_update_plane *update, uint32_t num_hits) { - struct vmw_framebuffer_surface *vfbs; uint32_t size = 0; - vfbs = container_of(update->vfb, typeof(*vfbs), base); - - if (vfbs->is_bo_proxy) - size += sizeof(struct vmw_stdu_update_gb_image) * num_hits; - size += sizeof(struct vmw_stdu_update); return size; @@ -1230,14 +1276,8 @@ vmw_stdu_surface_fifo_size_same_display(struct vmw_du_update_plane *update, static uint32_t vmw_stdu_surface_fifo_size(struct vmw_du_update_plane *update, uint32_t num_hits) { - struct vmw_framebuffer_surface *vfbs; uint32_t size = 0; - vfbs = container_of(update->vfb, typeof(*vfbs), base); - - if (vfbs->is_bo_proxy) - size += sizeof(struct vmw_stdu_update_gb_image) * num_hits; - size += sizeof(struct vmw_stdu_surface_copy) + sizeof(SVGA3dCopyBox) * num_hits + sizeof(struct vmw_stdu_update); @@ -1245,47 +1285,6 @@ static uint32_t vmw_stdu_surface_fifo_size(struct vmw_du_update_plane *update, } static uint32_t -vmw_stdu_surface_update_proxy(struct vmw_du_update_plane *update, void *cmd) -{ - struct vmw_framebuffer_surface *vfbs; - struct drm_plane_state *state = update->plane->state; - struct drm_plane_state *old_state = update->old_state; - struct vmw_stdu_update_gb_image *cmd_update = cmd; - struct drm_atomic_helper_damage_iter iter; - struct drm_rect clip; - uint32_t copy_size = 0; - - vfbs = container_of(update->vfb, typeof(*vfbs), base); - - /* - * proxy surface is special where a buffer object type fb is wrapped - * in a surface and need an update gb image command to sync with device. - */ - drm_atomic_helper_damage_iter_init(&iter, old_state, state); - drm_atomic_for_each_plane_damage(&iter, &clip) { - SVGA3dBox *box = &cmd_update->body.box; - - cmd_update->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE; - cmd_update->header.size = sizeof(cmd_update->body); - cmd_update->body.image.sid = vfbs->surface->res.id; - cmd_update->body.image.face = 0; - cmd_update->body.image.mipmap = 0; - - box->x = clip.x1; - box->y = clip.y1; - box->z = 0; - box->w = drm_rect_width(&clip); - box->h = drm_rect_height(&clip); - box->d = 1; - - copy_size += sizeof(*cmd_update); - cmd_update++; - } - - return copy_size; -} - -static uint32_t vmw_stdu_surface_populate_copy(struct vmw_du_update_plane *update, void *cmd, uint32_t num_hits) { @@ -1299,7 +1298,7 @@ vmw_stdu_surface_populate_copy(struct vmw_du_update_plane *update, void *cmd, cmd_copy->header.id = SVGA_3D_CMD_SURFACE_COPY; cmd_copy->header.size = sizeof(cmd_copy->body) + sizeof(SVGA3dCopyBox) * num_hits; - cmd_copy->body.src.sid = vfbs->surface->res.id; + cmd_copy->body.src.sid = vmw_user_object_surface(&vfbs->uo)->res.id; cmd_copy->body.dest.sid = stdu->display_srf->res.id; return sizeof(*cmd_copy); @@ -1370,10 +1369,7 @@ static int vmw_stdu_plane_update_surface(struct vmw_private *dev_priv, srf_update.mutex = &dev_priv->cmdbuf_mutex; srf_update.intr = true; - if (vfbs->is_bo_proxy) - srf_update.post_prepare = vmw_stdu_surface_update_proxy; - - if (vfbs->surface->res.id != stdu->display_srf->res.id) { + if (vmw_user_object_surface(&vfbs->uo)->res.id != stdu->display_srf->res.id) { srf_update.calc_fifo_size = vmw_stdu_surface_fifo_size; srf_update.pre_clip = vmw_stdu_surface_populate_copy; srf_update.clip = vmw_stdu_surface_populate_clip; @@ -1417,7 +1413,7 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane, stdu = vmw_crtc_to_stdu(crtc); dev_priv = vmw_priv(crtc->dev); - stdu->display_srf = vps->surf; + stdu->display_srf = vmw_user_object_surface(&vps->uo); stdu->content_fb_type = vps->content_fb_type; stdu->cpp = vps->cpp; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index e7a744dfcecf..8ae6a761c900 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -36,9 +37,6 @@ #include <drm/ttm/ttm_placement.h> #define SVGA3D_FLAGS_64(upper32, lower32) (((uint64_t)upper32 << 32) | lower32) -#define SVGA3D_FLAGS_UPPER_32(svga3d_flags) (svga3d_flags >> 32) -#define SVGA3D_FLAGS_LOWER_32(svga3d_flags) \ - (svga3d_flags & ((uint64_t)U32_MAX)) /** * struct vmw_user_surface - User-space visible surface resource @@ -686,6 +684,14 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) struct vmw_resource *res = &user_srf->srf.res; *p_base = NULL; + + /* + * Dumb buffers own the resource and they'll unref the + * resource themselves + */ + if (res && res->guest_memory_bo && res->guest_memory_bo->is_dumb) + return; + vmw_resource_unreference(&res); } @@ -812,7 +818,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, } } res->guest_memory_size = cur_bo_offset; - if (metadata->scanout && + if (!file_priv->atomic && + metadata->scanout && metadata->num_sizes == 1 && metadata->sizes[0].width == VMW_CURSOR_SNOOP_WIDTH && metadata->sizes[0].height == VMW_CURSOR_SNOOP_HEIGHT && @@ -864,6 +871,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, vmw_resource_unreference(&res); goto out_unlock; } + vmw_bo_add_detached_resource(res->guest_memory_bo, res); } tmp = vmw_resource_reference(&srf->res); @@ -892,6 +900,113 @@ out_unlock: return ret; } +static struct vmw_user_surface * +vmw_lookup_user_surface_for_buffer(struct vmw_private *vmw, struct vmw_bo *bo, + u32 handle) +{ + struct vmw_user_surface *user_srf = NULL; + struct vmw_surface *surf; + struct ttm_base_object *base; + + surf = vmw_bo_surface(bo); + if (surf) { + rcu_read_lock(); + user_srf = container_of(surf, struct vmw_user_surface, srf); + base = &user_srf->prime.base; + if (base && !kref_get_unless_zero(&base->refcount)) { + drm_dbg_driver(&vmw->drm, + "%s: referencing a stale surface handle %d\n", + __func__, handle); + base = NULL; + user_srf = NULL; + } + rcu_read_unlock(); + } + + return user_srf; +} + +struct vmw_surface *vmw_lookup_surface_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle) +{ + struct vmw_user_surface *user_srf = + vmw_lookup_user_surface_for_buffer(vmw, bo, handle); + struct vmw_surface *surf = NULL; + struct ttm_base_object *base; + + if (user_srf) { + surf = vmw_surface_reference(&user_srf->srf); + base = &user_srf->prime.base; + ttm_base_object_unref(&base); + } + return surf; +} + +u32 vmw_lookup_surface_handle_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle) +{ + struct vmw_user_surface *user_srf = + vmw_lookup_user_surface_for_buffer(vmw, bo, handle); + int surf_handle = 0; + struct ttm_base_object *base; + + if (user_srf) { + base = &user_srf->prime.base; + surf_handle = (u32)base->handle; + ttm_base_object_unref(&base); + } + return surf_handle; +} + +static int vmw_buffer_prime_to_surface_base(struct vmw_private *dev_priv, + struct drm_file *file_priv, + u32 fd, u32 *handle, + struct ttm_base_object **base_p) +{ + struct ttm_base_object *base; + struct vmw_bo *bo; + struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + struct vmw_user_surface *user_srf; + int ret; + + ret = drm_gem_prime_fd_to_handle(&dev_priv->drm, file_priv, fd, handle); + if (ret) { + drm_warn(&dev_priv->drm, + "Wasn't able to find user buffer for fd = %u.\n", fd); + return ret; + } + + ret = vmw_user_bo_lookup(file_priv, *handle, &bo); + if (ret) { + drm_warn(&dev_priv->drm, + "Wasn't able to lookup user buffer for handle = %u.\n", *handle); + return ret; + } + + user_srf = vmw_lookup_user_surface_for_buffer(dev_priv, bo, *handle); + if (WARN_ON(!user_srf)) { + drm_warn(&dev_priv->drm, + "User surface fd %d (handle %d) is null.\n", fd, *handle); + ret = -EINVAL; + goto out; + } + + base = &user_srf->prime.base; + ret = ttm_ref_object_add(tfile, base, NULL, false); + if (ret) { + drm_warn(&dev_priv->drm, + "Couldn't add an object ref for the buffer (%d).\n", *handle); + goto out; + } + + *base_p = base; +out: + vmw_user_bo_unref(&bo); + + return ret; +} static int vmw_surface_handle_reference(struct vmw_private *dev_priv, @@ -901,15 +1016,19 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, struct ttm_base_object **base_p) { struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - struct vmw_user_surface *user_srf; + struct vmw_user_surface *user_srf = NULL; uint32_t handle; struct ttm_base_object *base; int ret; if (handle_type == DRM_VMW_HANDLE_PRIME) { ret = ttm_prime_fd_to_handle(tfile, u_handle, &handle); - if (unlikely(ret != 0)) - return ret; + if (ret) + return vmw_buffer_prime_to_surface_base(dev_priv, + file_priv, + u_handle, + &handle, + base_p); } else { handle = u_handle; } @@ -1503,7 +1622,12 @@ vmw_gb_surface_define_internal(struct drm_device *dev, ret = vmw_user_bo_lookup(file_priv, req->base.buffer_handle, &res->guest_memory_bo); if (ret == 0) { - if (res->guest_memory_bo->tbo.base.size < res->guest_memory_size) { + if (res->guest_memory_bo->is_dumb) { + VMW_DEBUG_USER("Can't backup surface with a dumb buffer.\n"); + vmw_user_bo_unref(&res->guest_memory_bo); + ret = -EINVAL; + goto out_unlock; + } else if (res->guest_memory_bo->tbo.base.size < res->guest_memory_size) { VMW_DEBUG_USER("Surface backup buffer too small.\n"); vmw_user_bo_unref(&res->guest_memory_bo); ret = -EINVAL; @@ -1560,6 +1684,7 @@ vmw_gb_surface_define_internal(struct drm_device *dev, rep->handle = user_srf->prime.base.handle; rep->backup_size = res->guest_memory_size; if (res->guest_memory_bo) { + vmw_bo_add_detached_resource(res->guest_memory_bo, res); rep->buffer_map_handle = drm_vma_node_offset_addr(&res->guest_memory_bo->tbo.base.vma_node); rep->buffer_size = res->guest_memory_bo->tbo.base.size; @@ -2100,3 +2225,140 @@ int vmw_gb_surface_define(struct vmw_private *dev_priv, out_unlock: return ret; } + +static SVGA3dSurfaceFormat vmw_format_bpp_to_svga(struct vmw_private *vmw, + int bpp) +{ + switch (bpp) { + case 8: /* DRM_FORMAT_C8 */ + return SVGA3D_P8; + case 16: /* DRM_FORMAT_RGB565 */ + return SVGA3D_R5G6B5; + case 32: /* DRM_FORMAT_XRGB8888 */ + if (has_sm4_context(vmw)) + return SVGA3D_B8G8R8X8_UNORM; + return SVGA3D_X8R8G8B8; + default: + drm_warn(&vmw->drm, "Unsupported format bpp: %d\n", bpp); + return SVGA3D_X8R8G8B8; + } +} + +/** + * vmw_dumb_create - Create a dumb kms buffer + * + * @file_priv: Pointer to a struct drm_file identifying the caller. + * @dev: Pointer to the drm device. + * @args: Pointer to a struct drm_mode_create_dumb structure + * Return: Zero on success, negative error code on failure. + * + * This is a driver callback for the core drm create_dumb functionality. + * Note that this is very similar to the vmw_bo_alloc ioctl, except + * that the arguments have a different format. + */ +int vmw_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + struct vmw_private *dev_priv = vmw_priv(dev); + struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + struct vmw_bo *vbo = NULL; + struct vmw_resource *res = NULL; + union drm_vmw_gb_surface_create_ext_arg arg = { 0 }; + struct drm_vmw_gb_surface_create_ext_req *req = &arg.req; + int ret; + struct drm_vmw_size drm_size = { + .width = args->width, + .height = args->height, + .depth = 1, + }; + SVGA3dSurfaceFormat format = vmw_format_bpp_to_svga(dev_priv, args->bpp); + const struct SVGA3dSurfaceDesc *desc = vmw_surface_get_desc(format); + SVGA3dSurfaceAllFlags flags = SVGA3D_SURFACE_HINT_TEXTURE | + SVGA3D_SURFACE_HINT_RENDERTARGET | + SVGA3D_SURFACE_SCREENTARGET | + SVGA3D_SURFACE_BIND_SHADER_RESOURCE | + SVGA3D_SURFACE_BIND_RENDER_TARGET; + + /* + * Without mob support we're just going to use raw memory buffer + * because we wouldn't be able to support full surface coherency + * without mobs + */ + if (!dev_priv->has_mob) { + int cpp = DIV_ROUND_UP(args->bpp, 8); + + switch (cpp) { + case 1: /* DRM_FORMAT_C8 */ + case 2: /* DRM_FORMAT_RGB565 */ + case 4: /* DRM_FORMAT_XRGB8888 */ + break; + default: + /* + * Dumb buffers don't allow anything else. + * This is tested via IGT's dumb_buffers + */ + return -EINVAL; + } + + args->pitch = args->width * cpp; + args->size = ALIGN(args->pitch * args->height, PAGE_SIZE); + + ret = vmw_gem_object_create_with_handle(dev_priv, file_priv, + args->size, &args->handle, + &vbo); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_put(&vbo->tbo.base); + return ret; + } + + req->version = drm_vmw_gb_surface_v1; + req->multisample_pattern = SVGA3D_MS_PATTERN_NONE; + req->quality_level = SVGA3D_MS_QUALITY_NONE; + req->buffer_byte_stride = 0; + req->must_be_zero = 0; + req->base.svga3d_flags = SVGA3D_FLAGS_LOWER_32(flags); + req->svga3d_flags_upper_32_bits = SVGA3D_FLAGS_UPPER_32(flags); + req->base.format = (uint32_t)format; + req->base.drm_surface_flags = drm_vmw_surface_flag_scanout; + req->base.drm_surface_flags |= drm_vmw_surface_flag_shareable; + req->base.drm_surface_flags |= drm_vmw_surface_flag_create_buffer; + req->base.drm_surface_flags |= drm_vmw_surface_flag_coherent; + req->base.base_size.width = args->width; + req->base.base_size.height = args->height; + req->base.base_size.depth = 1; + req->base.array_size = 0; + req->base.mip_levels = 1; + req->base.multisample_count = 0; + req->base.buffer_handle = SVGA3D_INVALID_ID; + req->base.autogen_filter = SVGA3D_TEX_FILTER_NONE; + ret = vmw_gb_surface_define_ext_ioctl(dev, &arg, file_priv); + if (ret) { + drm_warn(dev, "Unable to create a dumb buffer\n"); + return ret; + } + + args->handle = arg.rep.buffer_handle; + args->size = arg.rep.buffer_size; + args->pitch = vmw_surface_calculate_pitch(desc, &drm_size); + + ret = vmw_user_resource_lookup_handle(dev_priv, tfile, arg.rep.handle, + user_surface_converter, + &res); + if (ret) { + drm_err(dev, "Created resource handle doesn't exist!\n"); + goto err; + } + + vbo = res->guest_memory_bo; + vbo->is_dumb = true; + vbo->dumb_surface = vmw_res_to_srf(res); + +err: + if (res) + vmw_resource_unreference(&res); + if (ret) + ttm_ref_object_base_unref(tfile, arg.rep.handle); + + return ret; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c index 3bfcf671fcd5..8651b788e98b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c @@ -75,7 +75,7 @@ done: return ret; } -static int +static void compute_crc(struct drm_crtc *crtc, struct vmw_surface *surf, u32 *crc) @@ -101,8 +101,6 @@ compute_crc(struct drm_crtc *crtc, } vmw_bo_unmap(bo); - - return 0; } static void @@ -116,7 +114,6 @@ crc_generate_worker(struct work_struct *work) u64 frame_start, frame_end; u32 crc32 = 0; struct vmw_surface *surf = 0; - int ret; spin_lock_irq(&du->vkms.crc_state_lock); crc_pending = du->vkms.crc_pending; @@ -130,22 +127,24 @@ crc_generate_worker(struct work_struct *work) return; spin_lock_irq(&du->vkms.crc_state_lock); - surf = du->vkms.surface; + surf = vmw_surface_reference(du->vkms.surface); spin_unlock_irq(&du->vkms.crc_state_lock); - if (vmw_surface_sync(vmw, surf)) { - drm_warn(crtc->dev, "CRC worker wasn't able to sync the crc surface!\n"); - return; - } + if (surf) { + if (vmw_surface_sync(vmw, surf)) { + drm_warn( + crtc->dev, + "CRC worker wasn't able to sync the crc surface!\n"); + return; + } - ret = compute_crc(crtc, surf, &crc32); - if (ret) - return; + compute_crc(crtc, surf, &crc32); + vmw_surface_unreference(&surf); + } spin_lock_irq(&du->vkms.crc_state_lock); frame_start = du->vkms.frame_start; frame_end = du->vkms.frame_end; - crc_pending = du->vkms.crc_pending; du->vkms.frame_start = 0; du->vkms.frame_end = 0; du->vkms.crc_pending = false; @@ -164,7 +163,7 @@ vmw_vkms_vblank_simulate(struct hrtimer *timer) struct vmw_display_unit *du = container_of(timer, struct vmw_display_unit, vkms.timer); struct drm_crtc *crtc = &du->crtc; struct vmw_private *vmw = vmw_priv(crtc->dev); - struct vmw_surface *surf = NULL; + bool has_surface = false; u64 ret_overrun; bool locked, ret; @@ -179,10 +178,10 @@ vmw_vkms_vblank_simulate(struct hrtimer *timer) WARN_ON(!ret); if (!locked) return HRTIMER_RESTART; - surf = du->vkms.surface; + has_surface = du->vkms.surface != NULL; vmw_vkms_unlock(crtc); - if (du->vkms.crc_enabled && surf) { + if (du->vkms.crc_enabled && has_surface) { u64 frame = drm_crtc_accurate_vblank_count(crtc); spin_lock(&du->vkms.crc_state_lock); @@ -336,6 +335,8 @@ vmw_vkms_crtc_cleanup(struct drm_crtc *crtc) { struct vmw_display_unit *du = vmw_crtc_to_du(crtc); + if (du->vkms.surface) + vmw_surface_unreference(&du->vkms.surface); WARN_ON(work_pending(&du->vkms.crc_generator_work)); hrtimer_cancel(&du->vkms.timer); } @@ -497,9 +498,12 @@ vmw_vkms_set_crc_surface(struct drm_crtc *crtc, struct vmw_display_unit *du = vmw_crtc_to_du(crtc); struct vmw_private *vmw = vmw_priv(crtc->dev); - if (vmw->vkms_enabled) { + if (vmw->vkms_enabled && du->vkms.surface != surf) { WARN_ON(atomic_read(&du->vkms.atomic_lock) != VMW_VKMS_LOCK_MODESET); - du->vkms.surface = surf; + if (du->vkms.surface) + vmw_surface_unreference(&du->vkms.surface); + if (surf) + du->vkms.surface = vmw_surface_reference(surf); } } diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index bdb578e0899f..4b59687ff5d8 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -288,12 +288,22 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) mp2_ops->start(privdata, info); cl_data->sensor_sts[i] = amd_sfh_wait_for_response (privdata, cl_data->sensor_idx[i], SENSOR_ENABLED); + + if (cl_data->sensor_sts[i] == SENSOR_ENABLED) + cl_data->is_any_sensor_enabled = true; + } + + if (!cl_data->is_any_sensor_enabled || + (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0)) { + dev_warn(dev, "Failed to discover, sensors not enabled is %d\n", + cl_data->is_any_sensor_enabled); + rc = -EOPNOTSUPP; + goto cleanup; } for (i = 0; i < cl_data->num_hid_devices; i++) { cl_data->cur_hid_dev = i; if (cl_data->sensor_sts[i] == SENSOR_ENABLED) { - cl_data->is_any_sensor_enabled = true; rc = amdtp_hid_probe(i, cl_data); if (rc) goto cleanup; @@ -305,12 +315,6 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) cl_data->sensor_sts[i]); } - if (!cl_data->is_any_sensor_enabled || - (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0)) { - dev_warn(dev, "Failed to discover, sensors not enabled is %d\n", cl_data->is_any_sensor_enabled); - rc = -EOPNOTSUPP; - goto cleanup; - } schedule_delayed_work(&cl_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP)); return 0; diff --git a/drivers/hid/bpf/Kconfig b/drivers/hid/bpf/Kconfig index 83214bae6768..d65482e02a6c 100644 --- a/drivers/hid/bpf/Kconfig +++ b/drivers/hid/bpf/Kconfig @@ -3,7 +3,7 @@ menu "HID-BPF support" config HID_BPF bool "HID-BPF support" - depends on BPF + depends on BPF_JIT depends on BPF_SYSCALL depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS help diff --git a/drivers/hid/bpf/hid_bpf_struct_ops.c b/drivers/hid/bpf/hid_bpf_struct_ops.c index f59cce6e437f..cd696c59ba0f 100644 --- a/drivers/hid/bpf/hid_bpf_struct_ops.c +++ b/drivers/hid/bpf/hid_bpf_struct_ops.c @@ -183,6 +183,10 @@ static int hid_bpf_reg(void *kdata, struct bpf_link *link) struct hid_device *hdev; int count, err = 0; + /* prevent multiple attach of the same struct_ops */ + if (ops->hdev) + return -EINVAL; + hdev = hid_get_device(ops->hid_id); if (IS_ERR(hdev)) return PTR_ERR(hdev); @@ -248,6 +252,7 @@ static void hid_bpf_unreg(void *kdata, struct bpf_link *link) list_del_rcu(&ops->list); synchronize_srcu(&hdev->bpf.srcu); + ops->hdev = NULL; reconnect = hdev->bpf.rdesc_ops == ops; if (reconnect) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index a44367aef621..1f4564982b95 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -692,78 +692,28 @@ static bool wacom_is_art_pen(int tool_id) static int wacom_intuos_get_tool_type(int tool_id) { - int tool_type = BTN_TOOL_PEN; - - if (wacom_is_art_pen(tool_id)) - return tool_type; - switch (tool_id) { case 0x812: /* Inking pen */ case 0x801: /* Intuos3 Inking pen */ case 0x12802: /* Intuos4/5 Inking Pen */ case 0x012: - tool_type = BTN_TOOL_PENCIL; - break; - - case 0x822: /* Pen */ - case 0x842: - case 0x852: - case 0x823: /* Intuos3 Grip Pen */ - case 0x813: /* Intuos3 Classic Pen */ - case 0x802: /* Intuos4/5 13HD/24HD General Pen */ - case 0x8e2: /* IntuosHT2 pen */ - case 0x022: - case 0x200: /* Pro Pen 3 */ - case 0x04200: /* Pro Pen 3 */ - case 0x10842: /* MobileStudio Pro Pro Pen slim */ - case 0x14802: /* Intuos4/5 13HD/24HD Classic Pen */ - case 0x16802: /* Cintiq 13HD Pro Pen */ - case 0x18802: /* DTH2242 Pen */ - case 0x10802: /* Intuos4/5 13HD/24HD General Pen */ - case 0x80842: /* Intuos Pro and Cintiq Pro 3D Pen */ - tool_type = BTN_TOOL_PEN; - break; + return BTN_TOOL_PENCIL; case 0x832: /* Stroke pen */ case 0x032: - tool_type = BTN_TOOL_BRUSH; - break; + return BTN_TOOL_BRUSH; case 0x007: /* Mouse 4D and 2D */ case 0x09c: case 0x094: case 0x017: /* Intuos3 2D Mouse */ case 0x806: /* Intuos4 Mouse */ - tool_type = BTN_TOOL_MOUSE; - break; + return BTN_TOOL_MOUSE; case 0x096: /* Lens cursor */ case 0x097: /* Intuos3 Lens cursor */ case 0x006: /* Intuos4 Lens cursor */ - tool_type = BTN_TOOL_LENS; - break; - - case 0x82a: /* Eraser */ - case 0x84a: - case 0x85a: - case 0x91a: - case 0xd1a: - case 0x0fa: - case 0x82b: /* Intuos3 Grip Pen Eraser */ - case 0x81b: /* Intuos3 Classic Pen Eraser */ - case 0x91b: /* Intuos3 Airbrush Eraser */ - case 0x80c: /* Intuos4/5 13HD/24HD Marker Pen Eraser */ - case 0x80a: /* Intuos4/5 13HD/24HD General Pen Eraser */ - case 0x90a: /* Intuos4/5 13HD/24HD Airbrush Eraser */ - case 0x1480a: /* Intuos4/5 13HD/24HD Classic Pen Eraser */ - case 0x1090a: /* Intuos4/5 13HD/24HD Airbrush Eraser */ - case 0x1080c: /* Intuos4/5 13HD/24HD Art Pen Eraser */ - case 0x1084a: /* MobileStudio Pro Pro Pen slim Eraser */ - case 0x1680a: /* Cintiq 13HD Pro Pen Eraser */ - case 0x1880a: /* DTH2242 Eraser */ - case 0x1080a: /* Intuos4/5 13HD/24HD General Pen Eraser */ - tool_type = BTN_TOOL_RUBBER; - break; + return BTN_TOOL_LENS; case 0xd12: case 0x912: @@ -771,10 +721,13 @@ static int wacom_intuos_get_tool_type(int tool_id) case 0x913: /* Intuos3 Airbrush */ case 0x902: /* Intuos4/5 13HD/24HD Airbrush */ case 0x10902: /* Intuos4/5 13HD/24HD Airbrush */ - tool_type = BTN_TOOL_AIRBRUSH; - break; + return BTN_TOOL_AIRBRUSH; + + default: + if (tool_id & 0x0008) + return BTN_TOOL_RUBBER; + return BTN_TOOL_PEN; } - return tool_type; } static void wacom_exit_report(struct wacom_wac *wacom) diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c index 14b53dac1253..6b04a674f832 100644 --- a/drivers/input/input-mt.c +++ b/drivers/input/input-mt.c @@ -46,6 +46,9 @@ int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots, return 0; if (mt) return mt->num_slots != num_slots ? -EINVAL : 0; + /* Arbitrary limit for avoiding too large memory allocation. */ + if (num_slots > 1024) + return -EINVAL; mt = kzalloc(struct_size(mt, slots, num_slots), GFP_KERNEL); if (!mt) diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c index 9d8f2c406043..b35903a06902 100644 --- a/drivers/irqchip/irq-loongarch-cpu.c +++ b/drivers/irqchip/irq-loongarch-cpu.c @@ -18,11 +18,13 @@ struct fwnode_handle *cpuintc_handle; static u32 lpic_gsi_to_irq(u32 gsi) { + int irq = 0; + /* Only pch irqdomain transferring is required for LoongArch. */ if (gsi >= GSI_MIN_PCH_IRQ && gsi <= GSI_MAX_PCH_IRQ) - return acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH); + irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH); - return 0; + return (irq > 0) ? irq : 0; } static struct fwnode_handle *lpic_get_gsi_domain_id(u32 gsi) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 093fd42893a7..53cc08387588 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -64,6 +64,20 @@ struct mbigen_device { void __iomem *base; }; +static inline unsigned int get_mbigen_node_offset(unsigned int nid) +{ + unsigned int offset = nid * MBIGEN_NODE_OFFSET; + + /* + * To avoid touched clear register in unexpected way, we need to directly + * skip clear register when access to more than 10 mbigen nodes. + */ + if (nid >= (REG_MBIGEN_CLEAR_OFFSET / MBIGEN_NODE_OFFSET)) + offset += MBIGEN_NODE_OFFSET; + + return offset; +} + static inline unsigned int get_mbigen_vec_reg(irq_hw_number_t hwirq) { unsigned int nid, pin; @@ -72,8 +86,7 @@ static inline unsigned int get_mbigen_vec_reg(irq_hw_number_t hwirq) nid = hwirq / IRQS_PER_MBIGEN_NODE + 1; pin = hwirq % IRQS_PER_MBIGEN_NODE; - return pin * 4 + nid * MBIGEN_NODE_OFFSET - + REG_MBIGEN_VEC_OFFSET; + return pin * 4 + get_mbigen_node_offset(nid) + REG_MBIGEN_VEC_OFFSET; } static inline void get_mbigen_type_reg(irq_hw_number_t hwirq, @@ -88,8 +101,7 @@ static inline void get_mbigen_type_reg(irq_hw_number_t hwirq, *mask = 1 << (irq_ofst % 32); ofst = irq_ofst / 32 * 4; - *addr = ofst + nid * MBIGEN_NODE_OFFSET - + REG_MBIGEN_TYPE_OFFSET; + *addr = ofst + get_mbigen_node_offset(nid) + REG_MBIGEN_TYPE_OFFSET; } static inline void get_mbigen_clear_reg(irq_hw_number_t hwirq, diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c index 27e30ce41db3..cd789fa51519 100644 --- a/drivers/irqchip/irq-meson-gpio.c +++ b/drivers/irqchip/irq-meson-gpio.c @@ -178,7 +178,7 @@ struct meson_gpio_irq_controller { void __iomem *base; u32 channel_irqs[MAX_NUM_CHANNEL]; DECLARE_BITMAP(channel_map, MAX_NUM_CHANNEL); - spinlock_t lock; + raw_spinlock_t lock; }; static void meson_gpio_irq_update_bits(struct meson_gpio_irq_controller *ctl, @@ -187,14 +187,14 @@ static void meson_gpio_irq_update_bits(struct meson_gpio_irq_controller *ctl, unsigned long flags; u32 tmp; - spin_lock_irqsave(&ctl->lock, flags); + raw_spin_lock_irqsave(&ctl->lock, flags); tmp = readl_relaxed(ctl->base + reg); tmp &= ~mask; tmp |= val; writel_relaxed(tmp, ctl->base + reg); - spin_unlock_irqrestore(&ctl->lock, flags); + raw_spin_unlock_irqrestore(&ctl->lock, flags); } static void meson_gpio_irq_init_dummy(struct meson_gpio_irq_controller *ctl) @@ -244,12 +244,12 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl, unsigned long flags; unsigned int idx; - spin_lock_irqsave(&ctl->lock, flags); + raw_spin_lock_irqsave(&ctl->lock, flags); /* Find a free channel */ idx = find_first_zero_bit(ctl->channel_map, ctl->params->nr_channels); if (idx >= ctl->params->nr_channels) { - spin_unlock_irqrestore(&ctl->lock, flags); + raw_spin_unlock_irqrestore(&ctl->lock, flags); pr_err("No channel available\n"); return -ENOSPC; } @@ -257,7 +257,7 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl, /* Mark the channel as used */ set_bit(idx, ctl->channel_map); - spin_unlock_irqrestore(&ctl->lock, flags); + raw_spin_unlock_irqrestore(&ctl->lock, flags); /* * Setup the mux of the channel to route the signal of the pad @@ -567,7 +567,7 @@ static int meson_gpio_irq_of_init(struct device_node *node, struct device_node * if (!ctl) return -ENOMEM; - spin_lock_init(&ctl->lock); + raw_spin_lock_init(&ctl->lock); ctl->base = of_iomap(node, 0); if (!ctl->base) { diff --git a/drivers/irqchip/irq-pic32-evic.c b/drivers/irqchip/irq-pic32-evic.c index 5d6b8e025bb8..eb6ca516a166 100644 --- a/drivers/irqchip/irq-pic32-evic.c +++ b/drivers/irqchip/irq-pic32-evic.c @@ -161,9 +161,9 @@ static int pic32_irq_domain_map(struct irq_domain *d, unsigned int virq, return ret; } -int pic32_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_type) +static int pic32_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type) { struct evic_chip_data *priv = d->host_data; diff --git a/drivers/media/pci/intel/ipu6/Kconfig b/drivers/media/pci/intel/ipu6/Kconfig index 154343080c82..40e20f0aa5ae 100644 --- a/drivers/media/pci/intel/ipu6/Kconfig +++ b/drivers/media/pci/intel/ipu6/Kconfig @@ -3,13 +3,14 @@ config VIDEO_INTEL_IPU6 depends on ACPI || COMPILE_TEST depends on VIDEO_DEV depends on X86 && X86_64 && HAS_DMA + depends on IPU_BRIDGE || !IPU_BRIDGE + select AUXILIARY_BUS select DMA_OPS select IOMMU_IOVA select VIDEO_V4L2_SUBDEV_API select MEDIA_CONTROLLER select VIDEOBUF2_DMA_CONTIG select V4L2_FWNODE - select IPU_BRIDGE help This is the 6th Gen Intel Image Processing Unit, found in Intel SoCs and used for capturing images and video from camera sensors. diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index 0136df5732ba..4fe26e82e3d1 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -2680,6 +2680,10 @@ static void uvc_ctrl_init_ctrl(struct uvc_video_chain *chain, for (i = 0; i < ARRAY_SIZE(uvc_ctrl_mappings); ++i) { const struct uvc_control_mapping *mapping = &uvc_ctrl_mappings[i]; + if (!uvc_entity_match_guid(ctrl->entity, mapping->entity) || + ctrl->info.selector != mapping->selector) + continue; + /* Let the device provide a custom mapping. */ if (mapping->filter_mapping) { mapping = mapping->filter_mapping(chain, ctrl); @@ -2687,9 +2691,7 @@ static void uvc_ctrl_init_ctrl(struct uvc_video_chain *chain, continue; } - if (uvc_entity_match_guid(ctrl->entity, mapping->entity) && - ctrl->info.selector == mapping->selector) - __uvc_ctrl_add_mapping(chain, ctrl, mapping); + __uvc_ctrl_add_mapping(chain, ctrl, mapping); } } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ffa74c26ee53..23f74c6c88b9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7649,8 +7649,8 @@ static int bnxt_get_avail_msix(struct bnxt *bp, int num); static int __bnxt_reserve_rings(struct bnxt *bp) { struct bnxt_hw_rings hwr = {0}; + int rx_rings, old_rx_rings, rc; int cp = bp->cp_nr_rings; - int rx_rings, rc; int ulp_msix = 0; bool sh = false; int tx_cp; @@ -7684,6 +7684,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp) hwr.grp = bp->rx_nr_rings; hwr.rss_ctx = bnxt_get_total_rss_ctxs(bp, &hwr); hwr.stat = bnxt_get_func_stat_ctxs(bp); + old_rx_rings = bp->hw_resc.resv_rx_rings; rc = bnxt_hwrm_reserve_rings(bp, &hwr); if (rc) @@ -7738,7 +7739,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp) if (!bnxt_rings_ok(bp, &hwr)) return -ENOMEM; - if (!netif_is_rxfh_configured(bp->dev)) + if (old_rx_rings != bp->hw_resc.resv_rx_rings && + !netif_is_rxfh_configured(bp->dev)) bnxt_set_dflt_rss_indir_tbl(bp, NULL); if (!bnxt_ulp_registered(bp->edev) && BNXT_NEW_RM(bp)) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index d00ef0063820..ab8e3f197e7b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1863,8 +1863,14 @@ static void bnxt_modify_rss(struct bnxt *bp, struct ethtool_rxfh_context *ctx, } static int bnxt_rxfh_context_check(struct bnxt *bp, + const struct ethtool_rxfh_param *rxfh, struct netlink_ext_ack *extack) { + if (rxfh->hfunc && rxfh->hfunc != ETH_RSS_HASH_TOP) { + NL_SET_ERR_MSG_MOD(extack, "RSS hash function not supported"); + return -EOPNOTSUPP; + } + if (!BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) { NL_SET_ERR_MSG_MOD(extack, "RSS contexts not supported"); return -EOPNOTSUPP; @@ -1888,7 +1894,7 @@ static int bnxt_create_rxfh_context(struct net_device *dev, struct bnxt_vnic_info *vnic; int rc; - rc = bnxt_rxfh_context_check(bp, extack); + rc = bnxt_rxfh_context_check(bp, rxfh, extack); if (rc) return rc; @@ -1915,8 +1921,12 @@ static int bnxt_create_rxfh_context(struct net_device *dev, if (rc) goto out; + /* Populate defaults in the context */ bnxt_set_dflt_rss_indir_tbl(bp, ctx); + ctx->hfunc = ETH_RSS_HASH_TOP; memcpy(vnic->rss_hash_key, bp->rss_hash_key, HW_HASH_KEY_SIZE); + memcpy(ethtool_rxfh_context_key(ctx), + bp->rss_hash_key, HW_HASH_KEY_SIZE); rc = bnxt_hwrm_vnic_alloc(bp, vnic, 0, bp->rx_nr_rings); if (rc) { @@ -1953,7 +1963,7 @@ static int bnxt_modify_rxfh_context(struct net_device *dev, struct bnxt_rss_ctx *rss_ctx; int rc; - rc = bnxt_rxfh_context_check(bp, extack); + rc = bnxt_rxfh_context_check(bp, rxfh, extack); if (rc) return rc; diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 99a75a59078e..caaa10157909 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -765,18 +765,17 @@ static inline struct xsk_buff_pool *ice_get_xp_from_qid(struct ice_vsi *vsi, } /** - * ice_xsk_pool - get XSK buffer pool bound to a ring + * ice_rx_xsk_pool - assign XSK buff pool to Rx ring * @ring: Rx ring to use * - * Returns a pointer to xsk_buff_pool structure if there is a buffer pool - * present, NULL otherwise. + * Sets XSK buff pool pointer on Rx ring. */ -static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) +static inline void ice_rx_xsk_pool(struct ice_rx_ring *ring) { struct ice_vsi *vsi = ring->vsi; u16 qid = ring->q_index; - return ice_get_xp_from_qid(vsi, qid); + WRITE_ONCE(ring->xsk_pool, ice_get_xp_from_qid(vsi, qid)); } /** @@ -801,7 +800,7 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid) if (!ring) return; - ring->xsk_pool = ice_get_xp_from_qid(vsi, qid); + WRITE_ONCE(ring->xsk_pool, ice_get_xp_from_qid(vsi, qid)); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 5d396c1a7731..1facf179a96f 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -536,7 +536,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) return err; } - ring->xsk_pool = ice_xsk_pool(ring); + ice_rx_xsk_pool(ring); if (ring->xsk_pool) { xdp_rxq_info_unreg(&ring->xdp_rxq); @@ -597,7 +597,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) return 0; } - ok = ice_alloc_rx_bufs_zc(ring, num_bufs); + ok = ice_alloc_rx_bufs_zc(ring, ring->xsk_pool, num_bufs); if (!ok) { u16 pf_q = ring->vsi->rxq_map[ring->q_index]; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ec636be4d17d..3de020020bc4 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2948,7 +2948,7 @@ static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi) ice_for_each_rxq(vsi, i) { struct ice_rx_ring *rx_ring = vsi->rx_rings[i]; - if (rx_ring->xsk_pool) + if (READ_ONCE(rx_ring->xsk_pool)) napi_schedule(&rx_ring->q_vector->napi); } } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 8bb743f78fcb..8d25b6981269 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -456,7 +456,7 @@ void ice_free_rx_ring(struct ice_rx_ring *rx_ring) if (rx_ring->vsi->type == ICE_VSI_PF) if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - rx_ring->xdp_prog = NULL; + WRITE_ONCE(rx_ring->xdp_prog, NULL); if (rx_ring->xsk_pool) { kfree(rx_ring->xdp_buf); rx_ring->xdp_buf = NULL; @@ -1521,10 +1521,11 @@ int ice_napi_poll(struct napi_struct *napi, int budget) * budget and be more aggressive about cleaning up the Tx descriptors. */ ice_for_each_tx_ring(tx_ring, q_vector->tx) { + struct xsk_buff_pool *xsk_pool = READ_ONCE(tx_ring->xsk_pool); bool wd; - if (tx_ring->xsk_pool) - wd = ice_xmit_zc(tx_ring); + if (xsk_pool) + wd = ice_xmit_zc(tx_ring, xsk_pool); else if (ice_ring_is_xdp(tx_ring)) wd = true; else @@ -1550,6 +1551,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) budget_per_ring = budget; ice_for_each_rx_ring(rx_ring, q_vector->rx) { + struct xsk_buff_pool *xsk_pool = READ_ONCE(rx_ring->xsk_pool); int cleaned; /* A dedicated path for zero-copy allows making a single @@ -1557,7 +1559,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) * ice_clean_rx_irq function and makes the codebase cleaner. */ cleaned = rx_ring->xsk_pool ? - ice_clean_rx_irq_zc(rx_ring, budget_per_ring) : + ice_clean_rx_irq_zc(rx_ring, xsk_pool, budget_per_ring) : ice_clean_rx_irq(rx_ring, budget_per_ring); work_done += cleaned; /* if we clean as many as budgeted, we must not be done */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index a65955eb23c0..240a7bec242b 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -52,10 +52,8 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx) { ice_clean_tx_ring(vsi->tx_rings[q_idx]); - if (ice_is_xdp_ena_vsi(vsi)) { - synchronize_rcu(); + if (ice_is_xdp_ena_vsi(vsi)) ice_clean_tx_ring(vsi->xdp_rings[q_idx]); - } ice_clean_rx_ring(vsi->rx_rings[q_idx]); } @@ -112,25 +110,29 @@ ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring, * ice_qvec_cfg_msix - Enable IRQ for given queue vector * @vsi: the VSI that contains queue vector * @q_vector: queue vector + * @qid: queue index */ static void -ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector) +ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector, u16 qid) { u16 reg_idx = q_vector->reg_idx; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; - struct ice_tx_ring *tx_ring; - struct ice_rx_ring *rx_ring; + int q, _qid = qid; ice_cfg_itr(hw, q_vector); - ice_for_each_tx_ring(tx_ring, q_vector->tx) - ice_cfg_txq_interrupt(vsi, tx_ring->reg_idx, reg_idx, - q_vector->tx.itr_idx); + for (q = 0; q < q_vector->num_ring_tx; q++) { + ice_cfg_txq_interrupt(vsi, _qid, reg_idx, q_vector->tx.itr_idx); + _qid++; + } + + _qid = qid; - ice_for_each_rx_ring(rx_ring, q_vector->rx) - ice_cfg_rxq_interrupt(vsi, rx_ring->reg_idx, reg_idx, - q_vector->rx.itr_idx); + for (q = 0; q < q_vector->num_ring_rx; q++) { + ice_cfg_rxq_interrupt(vsi, _qid, reg_idx, q_vector->rx.itr_idx); + _qid++; + } ice_flush(hw); } @@ -164,6 +166,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) struct ice_tx_ring *tx_ring; struct ice_rx_ring *rx_ring; int timeout = 50; + int fail = 0; int err; if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq) @@ -180,15 +183,17 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) usleep_range(1000, 2000); } + synchronize_net(); + netif_carrier_off(vsi->netdev); + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + ice_qvec_dis_irq(vsi, rx_ring, q_vector); ice_qvec_toggle_napi(vsi, q_vector, false); - netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); - ice_fill_txq_meta(vsi, tx_ring, &txq_meta); err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta); - if (err) - return err; + if (!fail) + fail = err; if (ice_is_xdp_ena_vsi(vsi)) { struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; @@ -196,17 +201,15 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) ice_fill_txq_meta(vsi, xdp_ring, &txq_meta); err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring, &txq_meta); - if (err) - return err; + if (!fail) + fail = err; } - err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true); - if (err) - return err; + ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, false); ice_qp_clean_rings(vsi, q_idx); ice_qp_reset_stats(vsi, q_idx); - return 0; + return fail; } /** @@ -219,40 +222,48 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) { struct ice_q_vector *q_vector; + int fail = 0; + bool link_up; int err; err = ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx); - if (err) - return err; + if (!fail) + fail = err; if (ice_is_xdp_ena_vsi(vsi)) { struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; err = ice_vsi_cfg_single_txq(vsi, vsi->xdp_rings, q_idx); - if (err) - return err; + if (!fail) + fail = err; ice_set_ring_xdp(xdp_ring); ice_tx_xsk_pool(vsi, q_idx); } err = ice_vsi_cfg_single_rxq(vsi, q_idx); - if (err) - return err; + if (!fail) + fail = err; q_vector = vsi->rx_rings[q_idx]->q_vector; - ice_qvec_cfg_msix(vsi, q_vector); + ice_qvec_cfg_msix(vsi, q_vector, q_idx); err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true); - if (err) - return err; + if (!fail) + fail = err; ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); - netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + /* make sure NAPI sees updated ice_{t,x}_ring::xsk_pool */ + synchronize_net(); + ice_get_link_status(vsi->port_info, &link_up); + if (link_up) { + netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + netif_carrier_on(vsi->netdev); + } clear_bit(ICE_CFG_BUSY, vsi->state); - return 0; + return fail; } /** @@ -459,6 +470,7 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, /** * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers * @rx_ring: Rx ring + * @xsk_pool: XSK buffer pool to pick buffers to be filled by HW * @count: The number of buffers to allocate * * Place the @count of descriptors onto Rx ring. Handle the ring wrap @@ -467,7 +479,8 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, * * Returns true if all allocations were successful, false if any fail. */ -static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, u16 count) { u32 nb_buffs_extra = 0, nb_buffs = 0; union ice_32b_rx_flex_desc *rx_desc; @@ -479,8 +492,7 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) xdp = ice_xdp_buf(rx_ring, ntu); if (ntu + count >= rx_ring->count) { - nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, - rx_desc, + nb_buffs_extra = ice_fill_rx_descs(xsk_pool, xdp, rx_desc, rx_ring->count - ntu); if (nb_buffs_extra != rx_ring->count - ntu) { ntu += nb_buffs_extra; @@ -493,7 +505,7 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) ice_release_rx_desc(rx_ring, 0); } - nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count); + nb_buffs = ice_fill_rx_descs(xsk_pool, xdp, rx_desc, count); ntu += nb_buffs; if (ntu == rx_ring->count) @@ -509,6 +521,7 @@ exit: /** * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers * @rx_ring: Rx ring + * @xsk_pool: XSK buffer pool to pick buffers to be filled by HW * @count: The number of buffers to allocate * * Wrapper for internal allocation routine; figure out how many tail @@ -516,7 +529,8 @@ exit: * * Returns true if all calls to internal alloc routine succeeded */ -bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, u16 count) { u16 rx_thresh = ICE_RING_QUARTER(rx_ring); u16 leftover, i, tail_bumps; @@ -525,9 +539,9 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) leftover = count - (tail_bumps * rx_thresh); for (i = 0; i < tail_bumps; i++) - if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh)) + if (!__ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, rx_thresh)) return false; - return __ice_alloc_rx_bufs_zc(rx_ring, leftover); + return __ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, leftover); } /** @@ -596,8 +610,10 @@ out: /** * ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ * @xdp_ring: XDP Tx ring + * @xsk_pool: AF_XDP buffer pool pointer */ -static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) +static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool) { u16 ntc = xdp_ring->next_to_clean; struct ice_tx_desc *tx_desc; @@ -648,7 +664,7 @@ skip: if (xdp_ring->next_to_clean >= cnt) xdp_ring->next_to_clean -= cnt; if (xsk_frames) - xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); + xsk_tx_completed(xsk_pool, xsk_frames); return completed_frames; } @@ -657,6 +673,7 @@ skip: * ice_xmit_xdp_tx_zc - AF_XDP ZC handler for XDP_TX * @xdp: XDP buffer to xmit * @xdp_ring: XDP ring to produce descriptor onto + * @xsk_pool: AF_XDP buffer pool pointer * * note that this function works directly on xdp_buff, no need to convert * it to xdp_frame. xdp_buff pointer is stored to ice_tx_buf so that cleaning @@ -666,7 +683,8 @@ skip: * was not enough space on XDP ring */ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, - struct ice_tx_ring *xdp_ring) + struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool) { struct skb_shared_info *sinfo = NULL; u32 size = xdp->data_end - xdp->data; @@ -680,7 +698,7 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, free_space = ICE_DESC_UNUSED(xdp_ring); if (free_space < ICE_RING_QUARTER(xdp_ring)) - free_space += ice_clean_xdp_irq_zc(xdp_ring); + free_space += ice_clean_xdp_irq_zc(xdp_ring, xsk_pool); if (unlikely(!free_space)) goto busy; @@ -700,7 +718,7 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, dma_addr_t dma; dma = xsk_buff_xdp_get_dma(xdp); - xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size); + xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, size); tx_buf->xdp = xdp; tx_buf->type = ICE_TX_BUF_XSK_TX; @@ -742,12 +760,14 @@ busy: * @xdp: xdp_buff used as input to the XDP program * @xdp_prog: XDP program to run * @xdp_ring: ring to be used for XDP_TX action + * @xsk_pool: AF_XDP buffer pool pointer * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static int ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring) + struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool) { int err, result = ICE_XDP_PASS; u32 act; @@ -758,7 +778,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); if (!err) return ICE_XDP_REDIR; - if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) + if (xsk_uses_need_wakeup(xsk_pool) && err == -ENOBUFS) result = ICE_XDP_EXIT; else result = ICE_XDP_CONSUMED; @@ -769,7 +789,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, case XDP_PASS: break; case XDP_TX: - result = ice_xmit_xdp_tx_zc(xdp, xdp_ring); + result = ice_xmit_xdp_tx_zc(xdp, xdp_ring, xsk_pool); if (result == ICE_XDP_CONSUMED) goto out_failure; break; @@ -821,14 +841,16 @@ ice_add_xsk_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *first, /** * ice_clean_rx_irq_zc - consumes packets from the hardware ring * @rx_ring: AF_XDP Rx ring + * @xsk_pool: AF_XDP buffer pool pointer * @budget: NAPI budget * * Returns number of processed packets on success, remaining budget on failure. */ -int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) +int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, + int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; - struct xsk_buff_pool *xsk_pool = rx_ring->xsk_pool; u32 ntc = rx_ring->next_to_clean; u32 ntu = rx_ring->next_to_use; struct xdp_buff *first = NULL; @@ -891,7 +913,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) if (ice_is_non_eop(rx_ring, rx_desc)) continue; - xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring); + xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring, + xsk_pool); if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) { xdp_xmit |= xdp_res; } else if (xdp_res == ICE_XDP_EXIT) { @@ -940,7 +963,8 @@ construct_skb: rx_ring->next_to_clean = ntc; entries_to_alloc = ICE_RX_DESC_UNUSED(rx_ring); if (entries_to_alloc > ICE_RING_QUARTER(rx_ring)) - failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc); + failure |= !ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, + entries_to_alloc); ice_finalize_xdp_rx(xdp_ring, xdp_xmit, 0); ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes); @@ -963,17 +987,19 @@ construct_skb: /** * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor * @xdp_ring: XDP ring to produce the HW Tx descriptor on + * @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW * @desc: AF_XDP descriptor to pull the DMA address and length from * @total_bytes: bytes accumulator that will be used for stats update */ -static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc, +static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool, struct xdp_desc *desc, unsigned int *total_bytes) { struct ice_tx_desc *tx_desc; dma_addr_t dma; - dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr); - xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len); + dma = xsk_buff_raw_get_dma(xsk_pool, desc->addr); + xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, desc->len); tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++); tx_desc->buf_addr = cpu_to_le64(dma); @@ -986,10 +1012,13 @@ static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc, /** * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from * @total_bytes: bytes accumulator that will be used for stats update */ -static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, +static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool, + struct xdp_desc *descs, unsigned int *total_bytes) { u16 ntu = xdp_ring->next_to_use; @@ -999,8 +1028,8 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { dma_addr_t dma; - dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr); - xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len); + dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr); + xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, descs[i].len); tx_desc = ICE_TX_DESC(xdp_ring, ntu++); tx_desc->buf_addr = cpu_to_le64(dma); @@ -1016,60 +1045,69 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de /** * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from * @nb_pkts: count of packets to be send * @total_bytes: bytes accumulator that will be used for stats update */ -static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, - u32 nb_pkts, unsigned int *total_bytes) +static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool, + struct xdp_desc *descs, u32 nb_pkts, + unsigned int *total_bytes) { u32 batched, leftover, i; batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH); leftover = nb_pkts & (PKTS_PER_BATCH - 1); for (i = 0; i < batched; i += PKTS_PER_BATCH) - ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes); + ice_xmit_pkt_batch(xdp_ring, xsk_pool, &descs[i], total_bytes); for (; i < batched + leftover; i++) - ice_xmit_pkt(xdp_ring, &descs[i], total_bytes); + ice_xmit_pkt(xdp_ring, xsk_pool, &descs[i], total_bytes); } /** * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @xsk_pool: AF_XDP buffer pool pointer * * Returns true if there is no more work that needs to be done, false otherwise */ -bool ice_xmit_zc(struct ice_tx_ring *xdp_ring) +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool) { - struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; + struct xdp_desc *descs = xsk_pool->tx_descs; u32 nb_pkts, nb_processed = 0; unsigned int total_bytes = 0; int budget; - ice_clean_xdp_irq_zc(xdp_ring); + ice_clean_xdp_irq_zc(xdp_ring, xsk_pool); + + if (!netif_carrier_ok(xdp_ring->vsi->netdev) || + !netif_running(xdp_ring->vsi->netdev)) + return true; budget = ICE_DESC_UNUSED(xdp_ring); budget = min_t(u16, budget, ICE_RING_QUARTER(xdp_ring)); - nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); + nb_pkts = xsk_tx_peek_release_desc_batch(xsk_pool, budget); if (!nb_pkts) return true; if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { nb_processed = xdp_ring->count - xdp_ring->next_to_use; - ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes); + ice_fill_tx_hw_ring(xdp_ring, xsk_pool, descs, nb_processed, + &total_bytes); xdp_ring->next_to_use = 0; } - ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed, - &total_bytes); + ice_fill_tx_hw_ring(xdp_ring, xsk_pool, &descs[nb_processed], + nb_pkts - nb_processed, &total_bytes); ice_set_rs_bit(xdp_ring); ice_xdp_ring_update_tail(xdp_ring); ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes); - if (xsk_uses_need_wakeup(xdp_ring->xsk_pool)) - xsk_set_tx_need_wakeup(xdp_ring->xsk_pool); + if (xsk_uses_need_wakeup(xsk_pool)) + xsk_set_tx_need_wakeup(xsk_pool); return nb_pkts < budget; } @@ -1091,7 +1129,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, struct ice_vsi *vsi = np->vsi; struct ice_tx_ring *ring; - if (test_bit(ICE_VSI_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state) || !netif_carrier_ok(netdev)) return -ENETDOWN; if (!ice_is_xdp_ena_vsi(vsi)) @@ -1102,7 +1140,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, ring = vsi->rx_rings[queue_id]->xdp_ring; - if (!ring->xsk_pool) + if (!READ_ONCE(ring->xsk_pool)) return -EINVAL; /* The idea here is that if NAPI is running, mark a miss, so diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 6fa181f080ef..45adeb513253 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -20,16 +20,20 @@ struct ice_vsi; #ifdef CONFIG_XDP_SOCKETS int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid); -int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget); +int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, + int budget); int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); -bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count); +bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, u16 count); bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring); void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring); -bool ice_xmit_zc(struct ice_tx_ring *xdp_ring); +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool); int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc); #else -static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring) +static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring, + struct xsk_buff_pool __always_unused *xsk_pool) { return false; } @@ -44,6 +48,7 @@ ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi, static inline int ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring, + struct xsk_buff_pool __always_unused *xsk_pool, int __always_unused budget) { return 0; @@ -51,6 +56,7 @@ ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring, static inline bool ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring, + struct xsk_buff_pool __always_unused *xsk_pool, u16 __always_unused count) { return false; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index cb5c7b09e8a0..8daf938afc36 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6306,21 +6306,6 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, size_t n; int i; - switch (qopt->cmd) { - case TAPRIO_CMD_REPLACE: - break; - case TAPRIO_CMD_DESTROY: - return igc_tsn_clear_schedule(adapter); - case TAPRIO_CMD_STATS: - igc_taprio_stats(adapter->netdev, &qopt->stats); - return 0; - case TAPRIO_CMD_QUEUE_STATS: - igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats); - return 0; - default: - return -EOPNOTSUPP; - } - if (qopt->base_time < 0) return -ERANGE; @@ -6429,7 +6414,23 @@ static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter, if (hw->mac.type != igc_i225) return -EOPNOTSUPP; - err = igc_save_qbv_schedule(adapter, qopt); + switch (qopt->cmd) { + case TAPRIO_CMD_REPLACE: + err = igc_save_qbv_schedule(adapter, qopt); + break; + case TAPRIO_CMD_DESTROY: + err = igc_tsn_clear_schedule(adapter); + break; + case TAPRIO_CMD_STATS: + igc_taprio_stats(adapter->netdev, &qopt->stats); + return 0; + case TAPRIO_CMD_QUEUE_STATS: + igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats); + return 0; + default: + return -EOPNOTSUPP; + } + if (err) return err; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 8c45ad983abc..0d62a33afa80 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -953,13 +953,13 @@ static void mvpp2_bm_pool_update_fc(struct mvpp2_port *port, static void mvpp2_bm_pool_update_priv_fc(struct mvpp2 *priv, bool en) { struct mvpp2_port *port; - int i; + int i, j; for (i = 0; i < priv->port_count; i++) { port = priv->port_list[i]; if (port->priv->percpu_pools) { - for (i = 0; i < port->nrxqs; i++) - mvpp2_bm_pool_update_fc(port, &port->priv->bm_pools[i], + for (j = 0; j < port->nrxqs; j++) + mvpp2_bm_pool_update_fc(port, &port->priv->bm_pools[j], port->tx_fc & en); } else { mvpp2_bm_pool_update_fc(port, port->pool_long, port->tx_fc & en); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 8cf8ba2622f2..71a168746ebe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -932,6 +932,7 @@ err_rule: mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, mh); mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); err_mod_hdr: + *attr = *old_attr; kfree(old_attr); err_attr: kvfree(spec); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 6e00afe4671b..797db853de36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -51,9 +51,10 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap)) caps |= MLX5_IPSEC_CAP_PACKET_OFFLOAD; - if ((MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) && - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) || - MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level)) + if (IS_ENABLED(CONFIG_MLX5_CLS_ACT) && + ((MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) || + MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level))) caps |= MLX5_IPSEC_CAP_PRIO; if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 00d5661dc62e..36845872ae94 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1409,7 +1409,12 @@ static int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext); + err = mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext); + if (err) { + netdev_err(priv->netdev, "%s: failed to set ptys reg: %d\n", __func__, err); + goto out; + } + mlx5_toggle_port_link(mdev); out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 979c49ae6b5c..b43ca0b762c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -207,6 +207,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + struct devlink *devlink = priv_to_devlink(dev); /* if this is the driver that initiated the fw reset, devlink completed the reload */ if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { @@ -218,9 +219,11 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unload mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); else mlx5_load_one(dev, true); - devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, + devl_lock(devlink); + devlink_remote_reload_actions_performed(devlink, 0, BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); + devl_unlock(devlink); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index f7b01b3f0cba..1477db7f5307 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -48,6 +48,7 @@ static struct mlx5_irq * irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) { struct irq_affinity_desc auto_desc = {}; + struct mlx5_irq *irq; u32 irq_index; int err; @@ -64,9 +65,12 @@ irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_de else cpu_get(pool, cpumask_first(&af_desc->mask)); } - return mlx5_irq_alloc(pool, irq_index, - cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc, - NULL); + irq = mlx5_irq_alloc(pool, irq_index, + cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc, + NULL); + if (IS_ERR(irq)) + xa_erase(&pool->irqs, irq_index); + return irq; } /* Looking for the IRQ with the smallest refcount that fits req_mask. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index d0871c46b8c5..cf8045b92689 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1538,7 +1538,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, goto unlock; for (i = 0; i < ldev->ports; i++) { - if (ldev->pf[MLX5_LAG_P1].netdev == slave) { + if (ldev->pf[i].netdev == slave) { port = i; break; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 527da58c7953..5b7e6f4b5c7e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -2142,7 +2142,6 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) /* Panic tear down fw command will stop the PCI bus communication * with the HCA, so the health poll is no longer needed. */ - mlx5_drain_health_wq(dev); mlx5_stop_health_poll(dev, false); ret = mlx5_cmd_fast_teardown_hca(dev); @@ -2177,6 +2176,7 @@ static void shutdown(struct pci_dev *pdev) mlx5_core_info(dev, "Shutdown was called\n"); set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); + mlx5_drain_health_wq(dev); err = mlx5_try_fast_unload(dev); if (err) mlx5_unload_one(dev, false); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index b2986175d9af..b706f1486504 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -112,6 +112,7 @@ static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev) struct mlx5_core_dev *mdev = sf_dev->mdev; set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state); + mlx5_drain_health_wq(mdev); mlx5_unload_one(mdev, false); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index 042ca0349124..d1db04baa1fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -7,7 +7,7 @@ /* don't try to optimize STE allocation if the stack is too constaraining */ #define DR_RULE_MAX_STES_OPTIMIZED 0 #else -#define DR_RULE_MAX_STES_OPTIMIZED 5 +#define DR_RULE_MAX_STES_OPTIMIZED 2 #endif #define DR_RULE_MAX_STE_CHAIN_OPTIMIZED (DR_RULE_MAX_STES_OPTIMIZED + DR_ACTION_MAX_STES) diff --git a/drivers/net/ethernet/meta/Kconfig b/drivers/net/ethernet/meta/Kconfig index 86034ea4ba5b..c002ede36402 100644 --- a/drivers/net/ethernet/meta/Kconfig +++ b/drivers/net/ethernet/meta/Kconfig @@ -20,7 +20,7 @@ if NET_VENDOR_META config FBNIC tristate "Meta Platforms Host Network Interface" depends on X86_64 || COMPILE_TEST - depends on S390=n + depends on !S390 depends on MAX_SKB_FRAGS < 22 depends on PCI_MSI select PHYLINK diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 714d2e804694..3507c2e28110 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4349,7 +4349,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, if (unlikely(!rtl_tx_slots_avail(tp))) { if (net_ratelimit()) netdev_err(dev, "BUG! Tx Ring full when queue awake!\n"); - goto err_stop_0; + netif_stop_queue(dev); + return NETDEV_TX_BUSY; } opts[1] = rtl8169_tx_vlan_tag(skb); @@ -4405,11 +4406,6 @@ err_dma_0: dev_kfree_skb_any(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; - -err_stop_0: - netif_stop_queue(dev); - dev->stats.tx_dropped++; - return NETDEV_TX_BUSY; } static unsigned int rtl_last_frag_len(struct sk_buff *skb) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index e342f387c3dd..02fdf66e07fa 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2219,9 +2219,9 @@ static void axienet_dma_err_handler(struct work_struct *work) ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); axienet_set_mac_address(ndev, NULL); axienet_set_multicast_list(ndev); - axienet_setoptions(ndev, lp->options); napi_enable(&lp->napi_rx); napi_enable(&lp->napi_tx); + axienet_setoptions(ndev, lp->options); } /** diff --git a/drivers/net/phy/aquantia/aquantia_main.c b/drivers/net/phy/aquantia/aquantia_main.c index d12e35374231..e982e9ce44a5 100644 --- a/drivers/net/phy/aquantia/aquantia_main.c +++ b/drivers/net/phy/aquantia/aquantia_main.c @@ -653,13 +653,7 @@ static int aqr107_fill_interface_modes(struct phy_device *phydev) unsigned long *possible = phydev->possible_interfaces; unsigned int serdes_mode, rate_adapt; phy_interface_t interface; - int i, val, ret; - - ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, - VEND1_GLOBAL_CFG_10M, val, val != 0, - 1000, 100000, false); - if (ret) - return ret; + int i, val; /* Walk the media-speed configuration registers to determine which * host-side serdes modes may be used by the PHY depending on the @@ -708,6 +702,25 @@ static int aqr107_fill_interface_modes(struct phy_device *phydev) return 0; } +static int aqr113c_fill_interface_modes(struct phy_device *phydev) +{ + int val, ret; + + /* It's been observed on some models that - when coming out of suspend + * - the FW signals that the PHY is ready but the GLOBAL_CFG registers + * continue on returning zeroes for some time. Let's poll the 100M + * register until it returns a real value as both 113c and 115c support + * this mode. + */ + ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, + VEND1_GLOBAL_CFG_100M, val, val != 0, + 1000, 100000, false); + if (ret) + return ret; + + return aqr107_fill_interface_modes(phydev); +} + static int aqr113c_config_init(struct phy_device *phydev) { int ret; @@ -725,7 +738,7 @@ static int aqr113c_config_init(struct phy_device *phydev) if (ret) return ret; - return aqr107_fill_interface_modes(phydev); + return aqr113c_fill_interface_modes(phydev); } static int aqr107_probe(struct phy_device *phydev) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index dd519805deee..65b0a3115e14 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1389,6 +1389,8 @@ static int ksz9131_config_init(struct phy_device *phydev) const struct device *dev_walker; int ret; + phydev->mdix_ctrl = ETH_TP_MDI_AUTO; + dev_walker = &phydev->mdio.dev; do { of_node = dev_walker->of_node; @@ -1438,28 +1440,30 @@ static int ksz9131_config_init(struct phy_device *phydev) #define MII_KSZ9131_AUTO_MDIX 0x1C #define MII_KSZ9131_AUTO_MDI_SET BIT(7) #define MII_KSZ9131_AUTO_MDIX_SWAP_OFF BIT(6) +#define MII_KSZ9131_DIG_AXAN_STS 0x14 +#define MII_KSZ9131_DIG_AXAN_STS_LINK_DET BIT(14) +#define MII_KSZ9131_DIG_AXAN_STS_A_SELECT BIT(12) static int ksz9131_mdix_update(struct phy_device *phydev) { int ret; - ret = phy_read(phydev, MII_KSZ9131_AUTO_MDIX); - if (ret < 0) - return ret; - - if (ret & MII_KSZ9131_AUTO_MDIX_SWAP_OFF) { - if (ret & MII_KSZ9131_AUTO_MDI_SET) - phydev->mdix_ctrl = ETH_TP_MDI; - else - phydev->mdix_ctrl = ETH_TP_MDI_X; + if (phydev->mdix_ctrl != ETH_TP_MDI_AUTO) { + phydev->mdix = phydev->mdix_ctrl; } else { - phydev->mdix_ctrl = ETH_TP_MDI_AUTO; - } + ret = phy_read(phydev, MII_KSZ9131_DIG_AXAN_STS); + if (ret < 0) + return ret; - if (ret & MII_KSZ9131_AUTO_MDI_SET) - phydev->mdix = ETH_TP_MDI; - else - phydev->mdix = ETH_TP_MDI_X; + if (ret & MII_KSZ9131_DIG_AXAN_STS_LINK_DET) { + if (ret & MII_KSZ9131_DIG_AXAN_STS_A_SELECT) + phydev->mdix = ETH_TP_MDI; + else + phydev->mdix = ETH_TP_MDI_X; + } else { + phydev->mdix = ETH_TP_MDI_INVALID; + } + } return 0; } diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index bed839237fb5..87865918dab6 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -1465,6 +1465,13 @@ static struct phy_driver realtek_drvs[] = { .handle_interrupt = genphy_handle_interrupt_no_ack, .suspend = genphy_suspend, .resume = genphy_resume, + }, { + PHY_ID_MATCH_EXACT(0x001cc960), + .name = "RTL8366S Gigabit Ethernet", + .suspend = genphy_suspend, + .resume = genphy_resume, + .read_mmd = genphy_read_mmd_unsupported, + .write_mmd = genphy_write_mmd_unsupported, }, }; diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index 0a662e42ed96..cb7d2f798fb4 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -179,6 +179,7 @@ static int sr_mdio_read(struct net_device *netdev, int phy_id, int loc) struct usbnet *dev = netdev_priv(netdev); __le16 res; int rc = 0; + int err; if (phy_id) { netdev_dbg(netdev, "Only internal phy supported\n"); @@ -189,11 +190,17 @@ static int sr_mdio_read(struct net_device *netdev, int phy_id, int loc) if (loc == MII_BMSR) { u8 value; - sr_read_reg(dev, SR_NSR, &value); + err = sr_read_reg(dev, SR_NSR, &value); + if (err < 0) + return err; + if (value & NSR_LINKST) rc = 1; } - sr_share_read_word(dev, 1, loc, &res); + err = sr_share_read_word(dev, 1, loc, &res); + if (err < 0) + return err; + if (rc == 1) res = le16_to_cpu(res) | BMSR_LSTATUS; else diff --git a/drivers/net/wan/fsl_qmc_hdlc.c b/drivers/net/wan/fsl_qmc_hdlc.c index c5e7ca793c43..8fcfbde31a1c 100644 --- a/drivers/net/wan/fsl_qmc_hdlc.c +++ b/drivers/net/wan/fsl_qmc_hdlc.c @@ -18,6 +18,7 @@ #include <linux/hdlc.h> #include <linux/mod_devicetable.h> #include <linux/module.h> +#include <linux/mutex.h> #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -37,7 +38,7 @@ struct qmc_hdlc { struct qmc_chan *qmc_chan; struct net_device *netdev; struct framer *framer; - spinlock_t carrier_lock; /* Protect carrier detection */ + struct mutex carrier_lock; /* Protect carrier detection */ struct notifier_block nb; bool is_crc32; spinlock_t tx_lock; /* Protect tx descriptors */ @@ -60,7 +61,7 @@ static int qmc_hdlc_framer_set_carrier(struct qmc_hdlc *qmc_hdlc) if (!qmc_hdlc->framer) return 0; - guard(spinlock_irqsave)(&qmc_hdlc->carrier_lock); + guard(mutex)(&qmc_hdlc->carrier_lock); ret = framer_get_status(qmc_hdlc->framer, &framer_status); if (ret) { @@ -249,6 +250,7 @@ static void qmc_hcld_recv_complete(void *context, size_t length, unsigned int fl struct qmc_hdlc_desc *desc = context; struct net_device *netdev; struct qmc_hdlc *qmc_hdlc; + size_t crc_size; int ret; netdev = desc->netdev; @@ -267,15 +269,26 @@ static void qmc_hcld_recv_complete(void *context, size_t length, unsigned int fl if (flags & QMC_RX_FLAG_HDLC_CRC) /* CRC error */ netdev->stats.rx_crc_errors++; kfree_skb(desc->skb); - } else { - netdev->stats.rx_packets++; - netdev->stats.rx_bytes += length; + goto re_queue; + } - skb_put(desc->skb, length); - desc->skb->protocol = hdlc_type_trans(desc->skb, netdev); - netif_rx(desc->skb); + /* Discard the CRC */ + crc_size = qmc_hdlc->is_crc32 ? 4 : 2; + if (length < crc_size) { + netdev->stats.rx_length_errors++; + kfree_skb(desc->skb); + goto re_queue; } + length -= crc_size; + + netdev->stats.rx_packets++; + netdev->stats.rx_bytes += length; + + skb_put(desc->skb, length); + desc->skb->protocol = hdlc_type_trans(desc->skb, netdev); + netif_rx(desc->skb); +re_queue: /* Re-queue a transfer using the same descriptor */ ret = qmc_hdlc_recv_queue(qmc_hdlc, desc, desc->dma_size); if (ret) { @@ -706,7 +719,7 @@ static int qmc_hdlc_probe(struct platform_device *pdev) qmc_hdlc->dev = dev; spin_lock_init(&qmc_hdlc->tx_lock); - spin_lock_init(&qmc_hdlc->carrier_lock); + mutex_init(&qmc_hdlc->carrier_lock); qmc_hdlc->qmc_chan = devm_qmc_chan_get_bychild(dev, dev->of_node); if (IS_ERR(qmc_hdlc->qmc_chan)) diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index 876c029f58f6..9e0b9e329bda 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -473,7 +473,8 @@ static void __ath12k_pci_ext_irq_disable(struct ath12k_base *ab) { int i; - clear_bit(ATH12K_FLAG_EXT_IRQ_ENABLED, &ab->dev_flags); + if (!test_and_clear_bit(ATH12K_FLAG_EXT_IRQ_ENABLED, &ab->dev_flags)) + return; for (i = 0; i < ATH12K_EXT_IRQ_GRP_NUM_MAX; i++) { struct ath12k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i]; diff --git a/drivers/net/wireless/ath/ath12k/wow.c b/drivers/net/wireless/ath/ath12k/wow.c index bead19db2c9a..9b8684abbe40 100644 --- a/drivers/net/wireless/ath/ath12k/wow.c +++ b/drivers/net/wireless/ath/ath12k/wow.c @@ -361,7 +361,7 @@ static int ath12k_wow_vif_set_wakeups(struct ath12k_vif *arvif, struct ath12k *ar = arvif->ar; unsigned long wow_mask = 0; int pattern_id = 0; - int ret, i; + int ret, i, j; /* Setup requested WOW features */ switch (arvif->vdev_type) { @@ -431,9 +431,9 @@ static int ath12k_wow_vif_set_wakeups(struct ath12k_vif *arvif, eth_pattern->pattern_len); /* convert bitmask to bytemask */ - for (i = 0; i < eth_pattern->pattern_len; i++) - if (eth_pattern->mask[i / 8] & BIT(i % 8)) - new_pattern.bytemask[i] = 0xff; + for (j = 0; j < eth_pattern->pattern_len; j++) + if (eth_pattern->mask[j / 8] & BIT(j % 8)) + new_pattern.bytemask[j] = 0xff; new_pattern.pattern_len = eth_pattern->pattern_len; new_pattern.pkt_offset = eth_pattern->pkt_offset; diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 2e6268cb06c0..1bab93d049df 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -303,6 +303,7 @@ mt7921_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) mvif->bss_conf.mt76.omac_idx = mvif->bss_conf.mt76.idx; mvif->phy = phy; + mvif->bss_conf.vif = mvif; mvif->bss_conf.mt76.band_idx = 0; mvif->bss_conf.mt76.wmm_idx = mvif->bss_conf.mt76.idx % MT76_CONNAC_MAX_WMM_SETS; diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 061f01f60db4..736ad8baa2a5 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -485,7 +485,9 @@ int pciehp_set_raw_indicator_status(struct hotplug_slot *hotplug_slot, struct pci_dev *pdev = ctrl_dev(ctrl); pci_config_pm_runtime_get(pdev); - pcie_write_cmd_nowait(ctrl, FIELD_PREP(PCI_EXP_SLTCTL_AIC, status), + + /* Attention and Power Indicator Control bits are supported */ + pcie_write_cmd_nowait(ctrl, FIELD_PREP(PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC, status), PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC); pci_config_pm_runtime_put(pdev); return 0; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e3a49f66982d..ffaaca0978cb 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4477,12 +4477,6 @@ void pci_intx(struct pci_dev *pdev, int enable) { u16 pci_command, new; - /* Preserve the "hybrid" behavior for backwards compatibility */ - if (pci_is_managed(pdev)) { - WARN_ON_ONCE(pcim_intx(pdev, enable) != 0); - return; - } - pci_read_config_word(pdev, PCI_COMMAND, &pci_command); if (enable) @@ -4490,8 +4484,15 @@ void pci_intx(struct pci_dev *pdev, int enable) else new = pci_command | PCI_COMMAND_INTX_DISABLE; - if (new != pci_command) + if (new != pci_command) { + /* Preserve the "hybrid" behavior for backwards compatibility */ + if (pci_is_managed(pdev)) { + WARN_ON_ONCE(pcim_intx(pdev, enable) != 0); + return; + } + pci_write_config_word(pdev, PCI_COMMAND, new); + } } EXPORT_SYMBOL_GPL(pci_intx); diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 44d3951d009f..31a17a56eb3b 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -416,7 +416,7 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event) * but not in the user access mode as we want to use the other counters * that support sampling/filtering. */ - if (hwc->flags & PERF_EVENT_FLAG_LEGACY) { + if ((hwc->flags & PERF_EVENT_FLAG_LEGACY) && (event->attr.type == PERF_TYPE_HARDWARE)) { if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) { cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH; cmask = 1; diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index f776fd42244f..73f75958e15c 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -813,9 +813,11 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, if (ret == -ENOPROTOOPT) { dev_dbg(ec_dev->dev, "GET_NEXT_EVENT returned invalid version error.\n"); + mutex_lock(&ec_dev->lock); ret = cros_ec_get_host_command_version_mask(ec_dev, EC_CMD_GET_NEXT_EVENT, &ver_mask); + mutex_unlock(&ec_dev->lock); if (ret < 0 || ver_mask == 0) /* * Do not change the MKBP supported version if we can't diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index b72f672a7720..66b1bdc63284 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -550,4 +550,5 @@ void ccwgroup_remove_ccwdev(struct ccw_device *cdev) put_device(&gdev->dev); } EXPORT_SYMBOL(ccwgroup_remove_ccwdev); +MODULE_DESCRIPTION("ccwgroup bus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 8ad49030a7bf..914dde041675 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -488,4 +488,5 @@ static void __exit vfio_ccw_sch_exit(void) module_init(vfio_ccw_sch_init); module_exit(vfio_ccw_sch_exit); +MODULE_DESCRIPTION("VFIO based Subchannel device driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 69b14918de59..ca8f132e03ae 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -3575,6 +3575,17 @@ static int mpi3mr_prepare_sg_scmd(struct mpi3mr_ioc *mrioc, scmd->sc_data_direction); priv->meta_sg_valid = 1; /* To unmap meta sg DMA */ } else { + /* + * Some firmware versions byte-swap the REPORT ZONES command + * reply from ATA-ZAC devices by directly accessing in the host + * buffer. This does not respect the default command DMA + * direction and causes IOMMU page faults on some architectures + * with an IOMMU enforcing write mappings (e.g. AMD hosts). + * Avoid such issue by making the REPORT ZONES buffer mapping + * bi-directional. + */ + if (scmd->cmnd[0] == ZBC_IN && scmd->cmnd[1] == ZI_REPORT_ZONES) + scmd->sc_data_direction = DMA_BIDIRECTIONAL; sg_scmd = scsi_sglist(scmd); sges_left = scsi_dma_map(scmd); } diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index b2bcf4a27ddc..b785a7e88b49 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2671,6 +2671,22 @@ _base_build_zero_len_sge_ieee(struct MPT3SAS_ADAPTER *ioc, void *paddr) _base_add_sg_single_ieee(paddr, sgl_flags, 0, 0, -1); } +static inline int _base_scsi_dma_map(struct scsi_cmnd *cmd) +{ + /* + * Some firmware versions byte-swap the REPORT ZONES command reply from + * ATA-ZAC devices by directly accessing in the host buffer. This does + * not respect the default command DMA direction and causes IOMMU page + * faults on some architectures with an IOMMU enforcing write mappings + * (e.g. AMD hosts). Avoid such issue by making the report zones buffer + * mapping bi-directional. + */ + if (cmd->cmnd[0] == ZBC_IN && cmd->cmnd[1] == ZI_REPORT_ZONES) + cmd->sc_data_direction = DMA_BIDIRECTIONAL; + + return scsi_dma_map(cmd); +} + /** * _base_build_sg_scmd - main sg creation routine * pcie_device is unused here! @@ -2717,7 +2733,7 @@ _base_build_sg_scmd(struct MPT3SAS_ADAPTER *ioc, sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; sg_scmd = scsi_sglist(scmd); - sges_left = scsi_dma_map(scmd); + sges_left = _base_scsi_dma_map(scmd); if (sges_left < 0) return -ENOMEM; @@ -2861,7 +2877,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, } sg_scmd = scsi_sglist(scmd); - sges_left = scsi_dma_map(scmd); + sges_left = _base_scsi_dma_map(scmd); if (sges_left < 0) return -ENOMEM; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index adeaa8ab9951..8bb3a3611851 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -4205,6 +4205,8 @@ static int sd_resume(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + if (opal_unlock_from_suspend(sdkp->opal_dev)) { sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); return -EIO; @@ -4221,13 +4223,12 @@ static int sd_resume_common(struct device *dev, bool runtime) if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; - sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); - if (!sd_do_start_stop(sdkp->device, runtime)) { sdkp->suspended = false; return 0; } + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); if (!ret) { sd_resume(dev); diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c index 114136893a59..006614921870 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c @@ -278,20 +278,32 @@ static struct thermal_zone_params tzone_params = { static bool msi_irq; +static void proc_thermal_free_msi(struct pci_dev *pdev, struct proc_thermal_pci *pci_info) +{ + int i; + + for (i = 0; i < MSI_THERMAL_MAX; i++) { + if (proc_thermal_msi_map[i]) + devm_free_irq(&pdev->dev, proc_thermal_msi_map[i], pci_info); + } + + pci_free_irq_vectors(pdev); +} + static int proc_thermal_setup_msi(struct pci_dev *pdev, struct proc_thermal_pci *pci_info) { - int ret, i, irq; + int ret, i, irq, count; - ret = pci_alloc_irq_vectors(pdev, 1, MSI_THERMAL_MAX, PCI_IRQ_MSI | PCI_IRQ_MSIX); - if (ret < 0) { + count = pci_alloc_irq_vectors(pdev, 1, MSI_THERMAL_MAX, PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (count < 0) { dev_err(&pdev->dev, "Failed to allocate vectors!\n"); - return ret; + return count; } dev_info(&pdev->dev, "msi enabled:%d msix enabled:%d\n", pdev->msi_enabled, pdev->msix_enabled); - for (i = 0; i < MSI_THERMAL_MAX; i++) { + for (i = 0; i < count; i++) { irq = pci_irq_vector(pdev, i); ret = devm_request_threaded_irq(&pdev->dev, irq, proc_thermal_irq_handler, @@ -310,7 +322,7 @@ static int proc_thermal_setup_msi(struct pci_dev *pdev, struct proc_thermal_pci return 0; err_free_msi_vectors: - pci_free_irq_vectors(pdev); + proc_thermal_free_msi(pdev, pci_info); return ret; } @@ -397,7 +409,7 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, const struct pci_device_ err_free_vectors: if (msi_irq) - pci_free_irq_vectors(pdev); + proc_thermal_free_msi(pdev, pci_info); err_ret_tzone: thermal_zone_device_unregister(pci_info->tzone); err_del_legacy: @@ -419,6 +431,9 @@ static void proc_thermal_pci_remove(struct pci_dev *pdev) proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_THRES_0, 0); proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0); + if (msi_irq) + proc_thermal_free_msi(pdev, pci_info); + thermal_zone_device_unregister(pci_info->tzone); proc_thermal_mmio_remove(pdev, pci_info->proc_priv); if (!pci_info->no_legacy) diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c index c0b679b846b3..06a0554ddc38 100644 --- a/drivers/thermal/thermal_trip.c +++ b/drivers/thermal/thermal_trip.c @@ -88,10 +88,10 @@ void thermal_zone_set_trips(struct thermal_zone_device *tz) return; for_each_trip_desc(tz, td) { - if (td->threshold < tz->temperature && td->threshold > low) + if (td->threshold <= tz->temperature && td->threshold > low) low = td->threshold; - if (td->threshold > tz->temperature && td->threshold < high) + if (td->threshold >= tz->temperature && td->threshold < high) high = td->threshold; } diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h index ce36154ce963..7aea8fbaeee8 100644 --- a/drivers/ufs/core/ufshcd-priv.h +++ b/drivers/ufs/core/ufshcd-priv.h @@ -316,6 +316,11 @@ static inline int ufshcd_rpm_get_sync(struct ufs_hba *hba) return pm_runtime_get_sync(&hba->ufs_device_wlun->sdev_gendev); } +static inline int ufshcd_rpm_get_if_active(struct ufs_hba *hba) +{ + return pm_runtime_get_if_active(&hba->ufs_device_wlun->sdev_gendev); +} + static inline int ufshcd_rpm_put_sync(struct ufs_hba *hba) { return pm_runtime_put_sync(&hba->ufs_device_wlun->sdev_gendev); diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index dc757ba47522..5e3c67e96956 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2416,7 +2416,17 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba) return err; } + /* + * The UFSHCI 3.0 specification does not define MCQ_SUPPORT and + * LSDB_SUPPORT, but [31:29] as reserved bits with reset value 0s, which + * means we can simply read values regardless of version. + */ hba->mcq_sup = FIELD_GET(MASK_MCQ_SUPPORT, hba->capabilities); + /* + * 0h: legacy single doorbell support is available + * 1h: indicate that legacy single doorbell support has been removed + */ + hba->lsdb_sup = !FIELD_GET(MASK_LSDB_SUPPORT, hba->capabilities); if (!hba->mcq_sup) return 0; @@ -6553,7 +6563,8 @@ again: if (ufshcd_err_handling_should_stop(hba)) goto skip_err_handling; - if (hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) { + if ((hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) && + !hba->force_reset) { bool ret; spin_unlock_irqrestore(hba->host->host_lock, flags); @@ -8211,7 +8222,10 @@ static void ufshcd_update_rtc(struct ufs_hba *hba) */ val = ts64.tv_sec - hba->dev_info.rtc_time_baseline; - ufshcd_rpm_get_sync(hba); + /* Skip update RTC if RPM state is not RPM_ACTIVE */ + if (ufshcd_rpm_get_if_active(hba) <= 0) + return; + err = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_WRITE_ATTR, QUERY_ATTR_IDN_SECONDS_PASSED, 0, 0, &val); ufshcd_rpm_put_sync(hba); @@ -10265,9 +10279,6 @@ int ufshcd_system_restore(struct device *dev) */ ufshcd_readl(hba, REG_UTP_TASK_REQ_LIST_BASE_H); - /* Resuming from hibernate, assume that link was OFF */ - ufshcd_set_link_off(hba); - return 0; } @@ -10496,6 +10507,12 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) } if (!is_mcq_supported(hba)) { + if (!hba->lsdb_sup) { + dev_err(hba->dev, "%s: failed to initialize (legacy doorbell mode not supported)\n", + __func__); + err = -EINVAL; + goto out_disable; + } err = scsi_add_host(host, hba->dev); if (err) { dev_err(hba->dev, "scsi_add_host failed\n"); diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index 16ad3528d80b..9ec318ef52bf 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -1293,6 +1293,9 @@ static void exynos_ufs_fmp_resume(struct ufs_hba *hba) { struct arm_smccc_res res; + if (!(hba->caps & UFSHCD_CAP_CRYPTO)) + return; + arm_smccc_smc(SMC_CMD_FMP_SECURITY, 0, SMU_EMBEDDED, CFG_DESCTYPE_3, 0, 0, 0, 0, &res); if (res.a0) diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_hw.c b/drivers/vdpa/octeon_ep/octep_vdpa_hw.c index 7fa0491bb201..11bd76ae18cf 100644 --- a/drivers/vdpa/octeon_ep/octep_vdpa_hw.c +++ b/drivers/vdpa/octeon_ep/octep_vdpa_hw.c @@ -140,7 +140,7 @@ static int octep_process_mbox(struct octep_hw *oct_hw, u16 id, u16 qid, void *bu val = octep_read_sig(mbox); if ((val & 0xFFFF) != MBOX_RSP_SIG) { dev_warn(&pdev->dev, "Invalid Signature from mbox : %d response\n", id); - return ret; + return -EINVAL; } val = octep_read_sts(mbox); diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index a9b93e99c23a..bc1f962e483b 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -305,15 +305,9 @@ static int virtio_dev_probe(struct device *_d) if (err) goto err; - if (dev->config->create_avq) { - err = dev->config->create_avq(dev); - if (err) - goto err; - } - err = drv->probe(dev); if (err) - goto err_probe; + goto err; /* If probe didn't do it, mark device DRIVER_OK ourselves. */ if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK)) @@ -326,9 +320,6 @@ static int virtio_dev_probe(struct device *_d) return 0; -err_probe: - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); err: virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return err; @@ -344,9 +335,6 @@ static void virtio_dev_remove(struct device *_d) drv->remove(dev); - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); - /* Driver should have reset device. */ WARN_ON_ONCE(dev->config->get_status(dev)); @@ -524,9 +512,6 @@ int virtio_device_freeze(struct virtio_device *dev) } } - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); - return 0; } EXPORT_SYMBOL_GPL(virtio_device_freeze); @@ -562,16 +547,10 @@ int virtio_device_restore(struct virtio_device *dev) if (ret) goto err; - if (dev->config->create_avq) { - ret = dev->config->create_avq(dev); - if (ret) - goto err; - } - if (drv->restore) { ret = drv->restore(dev); if (ret) - goto err_restore; + goto err; } /* If restore didn't do it, mark device DRIVER_OK ourselves. */ @@ -582,9 +561,6 @@ int virtio_device_restore(struct virtio_device *dev) return 0; -err_restore: - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); err: virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return ret; diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 7d82facafd75..c44d8ba00c02 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -46,12 +46,26 @@ bool vp_notify(struct virtqueue *vq) return true; } +/* Notify all slow path virtqueues on an interrupt. */ +static void vp_vring_slow_path_interrupt(int irq, + struct virtio_pci_device *vp_dev) +{ + struct virtio_pci_vq_info *info; + unsigned long flags; + + spin_lock_irqsave(&vp_dev->lock, flags); + list_for_each_entry(info, &vp_dev->slow_virtqueues, node) + vring_interrupt(irq, info->vq); + spin_unlock_irqrestore(&vp_dev->lock, flags); +} + /* Handle a configuration change: Tell driver if it wants to know. */ static irqreturn_t vp_config_changed(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; virtio_config_changed(&vp_dev->vdev); + vp_vring_slow_path_interrupt(irq, vp_dev); return IRQ_HANDLED; } @@ -125,6 +139,9 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, GFP_KERNEL)) goto error; + if (!per_vq_vectors) + desc = NULL; + if (desc) { flags |= PCI_IRQ_AFFINITY; desc->pre_vectors++; /* virtio config vector */ @@ -171,11 +188,17 @@ error: return err; } +static bool vp_is_slow_path_vector(u16 msix_vec) +{ + return msix_vec == VP_MSIX_CONFIG_VECTOR; +} + static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, bool ctx, - u16 msix_vec) + u16 msix_vec, + struct virtio_pci_vq_info **p_info) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL); @@ -194,13 +217,16 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int in info->vq = vq; if (callback) { spin_lock_irqsave(&vp_dev->lock, flags); - list_add(&info->node, &vp_dev->virtqueues); + if (!vp_is_slow_path_vector(msix_vec)) + list_add(&info->node, &vp_dev->virtqueues); + else + list_add(&info->node, &vp_dev->slow_virtqueues); spin_unlock_irqrestore(&vp_dev->lock, flags); } else { INIT_LIST_HEAD(&info->node); } - vp_dev->vqs[index] = info; + *p_info = info; return vq; out_info: @@ -236,13 +262,11 @@ void vp_del_vqs(struct virtio_device *vdev) int i; list_for_each_entry_safe(vq, n, &vdev->vqs, list) { - if (vp_dev->is_avq && vp_dev->is_avq(vdev, vq->index)) - continue; - if (vp_dev->per_vq_vectors) { int v = vp_dev->vqs[vq->index]->msix_vector; - if (v != VIRTIO_MSI_NO_VECTOR) { + if (v != VIRTIO_MSI_NO_VECTOR && + !vp_is_slow_path_vector(v)) { int irq = pci_irq_vector(vp_dev->pci_dev, v); irq_update_affinity_hint(irq, NULL); @@ -284,21 +308,85 @@ void vp_del_vqs(struct virtio_device *vdev) vp_dev->vqs = NULL; } +enum vp_vq_vector_policy { + VP_VQ_VECTOR_POLICY_EACH, + VP_VQ_VECTOR_POLICY_SHARED_SLOW, + VP_VQ_VECTOR_POLICY_SHARED, +}; + +static struct virtqueue * +vp_find_one_vq_msix(struct virtio_device *vdev, int queue_idx, + vq_callback_t *callback, const char *name, bool ctx, + bool slow_path, int *allocated_vectors, + enum vp_vq_vector_policy vector_policy, + struct virtio_pci_vq_info **p_info) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtqueue *vq; + u16 msix_vec; + int err; + + if (!callback) + msix_vec = VIRTIO_MSI_NO_VECTOR; + else if (vector_policy == VP_VQ_VECTOR_POLICY_EACH || + (vector_policy == VP_VQ_VECTOR_POLICY_SHARED_SLOW && + !slow_path)) + msix_vec = (*allocated_vectors)++; + else if (vector_policy != VP_VQ_VECTOR_POLICY_EACH && + slow_path) + msix_vec = VP_MSIX_CONFIG_VECTOR; + else + msix_vec = VP_MSIX_VQ_VECTOR; + vq = vp_setup_vq(vdev, queue_idx, callback, name, ctx, msix_vec, + p_info); + if (IS_ERR(vq)) + return vq; + + if (vector_policy == VP_VQ_VECTOR_POLICY_SHARED || + msix_vec == VIRTIO_MSI_NO_VECTOR || + vp_is_slow_path_vector(msix_vec)) + return vq; + + /* allocate per-vq irq if available and necessary */ + snprintf(vp_dev->msix_names[msix_vec], sizeof(*vp_dev->msix_names), + "%s-%s", dev_name(&vp_dev->vdev.dev), name); + err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), + vring_interrupt, 0, + vp_dev->msix_names[msix_vec], vq); + if (err) { + vp_del_vq(vq); + return ERR_PTR(err); + } + + return vq; +} + static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[], - bool per_vq_vectors, + enum vp_vq_vector_policy vector_policy, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; struct virtqueue_info *vqi; - u16 msix_vec; int i, err, nvectors, allocated_vectors, queue_idx = 0; + struct virtqueue *vq; + bool per_vq_vectors; + u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; + if (vp_dev->avq_index) { + err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); + if (err) + goto error_find; + } + + per_vq_vectors = vector_policy != VP_VQ_VECTOR_POLICY_SHARED; + if (per_vq_vectors) { /* Best option: one for change interrupt, one per vq. */ nvectors = 1; @@ -307,13 +395,14 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, if (vqi->name && vqi->callback) ++nvectors; } + if (avq_num && vector_policy == VP_VQ_VECTOR_POLICY_EACH) + ++nvectors; } else { /* Second best: one for change, shared for all vqs. */ nvectors = 2; } - err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors, - per_vq_vectors ? desc : NULL); + err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors, desc); if (err) goto error_find; @@ -325,37 +414,27 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, vqs[i] = NULL; continue; } - - if (!vqi->callback) - msix_vec = VIRTIO_MSI_NO_VECTOR; - else if (vp_dev->per_vq_vectors) - msix_vec = allocated_vectors++; - else - msix_vec = VP_MSIX_VQ_VECTOR; - vqs[i] = vp_setup_vq(vdev, queue_idx++, vqi->callback, - vqi->name, vqi->ctx, msix_vec); + vqs[i] = vp_find_one_vq_msix(vdev, queue_idx++, vqi->callback, + vqi->name, vqi->ctx, false, + &allocated_vectors, vector_policy, + &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto error_find; } + } - if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR) - continue; - - /* allocate per-vq irq if available and necessary */ - snprintf(vp_dev->msix_names[msix_vec], - sizeof *vp_dev->msix_names, - "%s-%s", - dev_name(&vp_dev->vdev.dev), vqi->name); - err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), - vring_interrupt, 0, - vp_dev->msix_names[msix_vec], - vqs[i]); - if (err) { - vp_del_vq(vqs[i]); - goto error_find; - } + if (!avq_num) + return 0; + sprintf(avq->name, "avq.%u", avq->vq_index); + vq = vp_find_one_vq_msix(vdev, avq->vq_index, vp_modern_avq_done, + avq->name, false, true, &allocated_vectors, + vector_policy, &vp_dev->admin_vq.info); + if (IS_ERR(vq)) { + err = PTR_ERR(vq); + goto error_find; } + return 0; error_find: @@ -368,12 +447,21 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue_info vqs_info[]) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; int i, err, queue_idx = 0; + struct virtqueue *vq; + u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; + if (vp_dev->avq_index) { + err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); + if (err) + goto out_del_vqs; + } + err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vp_dev); if (err) @@ -390,13 +478,24 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, } vqs[i] = vp_setup_vq(vdev, queue_idx++, vqi->callback, vqi->name, vqi->ctx, - VIRTIO_MSI_NO_VECTOR); + VIRTIO_MSI_NO_VECTOR, &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto out_del_vqs; } } + if (!avq_num) + return 0; + sprintf(avq->name, "avq.%u", avq->vq_index); + vq = vp_setup_vq(vdev, queue_idx++, vp_modern_avq_done, avq->name, + false, VIRTIO_MSI_NO_VECTOR, + &vp_dev->admin_vq.info); + if (IS_ERR(vq)) { + err = PTR_ERR(vq); + goto out_del_vqs; + } + return 0; out_del_vqs: vp_del_vqs(vdev); @@ -411,11 +510,20 @@ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, int err; /* Try MSI-X with one vector per queue. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, true, desc); + err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, + VP_VQ_VECTOR_POLICY_EACH, desc); + if (!err) + return 0; + /* Fallback: MSI-X with one shared vector for config and + * slow path queues, one vector per queue for the rest. + */ + err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, + VP_VQ_VECTOR_POLICY_SHARED_SLOW, desc); if (!err) return 0; /* Fallback: MSI-X with one vector for config, one shared for queues. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, false, desc); + err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, + VP_VQ_VECTOR_POLICY_SHARED, desc); if (!err) return 0; /* Is there an interrupt? If not give up. */ @@ -466,7 +574,8 @@ const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index) struct virtio_pci_device *vp_dev = to_vp_device(vdev); if (!vp_dev->per_vq_vectors || - vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR) + vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR || + vp_is_slow_path_vector(vp_dev->vqs[index]->msix_vector)) return NULL; return pci_irq_get_affinity(vp_dev->pci_dev, @@ -574,6 +683,7 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, vp_dev->vdev.dev.release = virtio_pci_release_dev; vp_dev->pci_dev = pci_dev; INIT_LIST_HEAD(&vp_dev->virtqueues); + INIT_LIST_HEAD(&vp_dev->slow_virtqueues); spin_lock_init(&vp_dev->lock); /* enable the device */ diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 3c4bb2d6163a..1d9c49947f52 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -35,7 +35,7 @@ struct virtio_pci_vq_info { /* the actual virtqueue */ struct virtqueue *vq; - /* the list node for the virtqueues list */ + /* the list node for the virtqueues or slow_virtqueues list */ struct list_head node; /* MSI-X vector (or none) */ @@ -44,9 +44,9 @@ struct virtio_pci_vq_info { struct virtio_pci_admin_vq { /* Virtqueue info associated with this admin queue. */ - struct virtio_pci_vq_info info; - /* serializing admin commands execution and virtqueue deletion */ - struct mutex cmd_lock; + struct virtio_pci_vq_info *info; + /* Protects virtqueue access. */ + spinlock_t lock; u64 supported_cmds; /* Name of the admin queue: avq.$vq_index. */ char name[10]; @@ -66,9 +66,12 @@ struct virtio_pci_device { /* Where to read and clear interrupt */ u8 __iomem *isr; - /* a list of queues so we can dispatch IRQs */ + /* Lists of queues and potentially slow path queues + * so we can dispatch IRQs. + */ spinlock_t lock; struct list_head virtqueues; + struct list_head slow_virtqueues; /* Array of all virtqueues reported in the * PCI common config num_queues field @@ -102,7 +105,7 @@ struct virtio_pci_device { void (*del_vq)(struct virtio_pci_vq_info *info); u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector); - bool (*is_avq)(struct virtio_device *vdev, unsigned int index); + int (*avq_index)(struct virtio_device *vdev, u16 *index, u16 *num); }; /* Constants for MSI-X */ @@ -175,6 +178,7 @@ struct virtio_device *virtio_pci_vf_get_pf_dev(struct pci_dev *pdev); #define VIRTIO_ADMIN_CMD_BITMAP 0 #endif +void vp_modern_avq_done(struct virtqueue *vq); int vp_modern_admin_cmd_exec(struct virtio_device *vdev, struct virtio_admin_cmd *cmd); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 3b5b9499a53a..9193c30d640a 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -28,6 +28,21 @@ static u64 vp_get_features(struct virtio_device *vdev) return vp_modern_get_features(&vp_dev->mdev); } +static int vp_avq_index(struct virtio_device *vdev, u16 *index, u16 *num) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + *num = 0; + if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) + return 0; + + *num = vp_modern_avq_num(&vp_dev->mdev); + if (!(*num)) + return -EINVAL; + *index = vp_modern_avq_index(&vp_dev->mdev); + return 0; +} + static bool vp_is_avq(struct virtio_device *vdev, unsigned int index) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -38,17 +53,35 @@ static bool vp_is_avq(struct virtio_device *vdev, unsigned int index) return index == vp_dev->admin_vq.vq_index; } +void vp_modern_avq_done(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; + struct virtio_admin_cmd *cmd; + unsigned long flags; + unsigned int len; + + spin_lock_irqsave(&admin_vq->lock, flags); + do { + virtqueue_disable_cb(vq); + while ((cmd = virtqueue_get_buf(vq, &len))) + complete(&cmd->completion); + } while (!virtqueue_enable_cb(vq)); + spin_unlock_irqrestore(&admin_vq->lock, flags); +} + static int virtqueue_exec_admin_cmd(struct virtio_pci_admin_vq *admin_vq, u16 opcode, struct scatterlist **sgs, unsigned int out_num, unsigned int in_num, - void *data) + struct virtio_admin_cmd *cmd) { struct virtqueue *vq; - int ret, len; + unsigned long flags; + int ret; - vq = admin_vq->info.vq; + vq = admin_vq->info->vq; if (!vq) return -EIO; @@ -57,21 +90,33 @@ static int virtqueue_exec_admin_cmd(struct virtio_pci_admin_vq *admin_vq, !((1ULL << opcode) & admin_vq->supported_cmds)) return -EOPNOTSUPP; - ret = virtqueue_add_sgs(vq, sgs, out_num, in_num, data, GFP_KERNEL); - if (ret < 0) - return -EIO; + init_completion(&cmd->completion); - if (unlikely(!virtqueue_kick(vq))) +again: + if (virtqueue_is_broken(vq)) return -EIO; - while (!virtqueue_get_buf(vq, &len) && - !virtqueue_is_broken(vq)) - cpu_relax(); + spin_lock_irqsave(&admin_vq->lock, flags); + ret = virtqueue_add_sgs(vq, sgs, out_num, in_num, cmd, GFP_KERNEL); + if (ret < 0) { + if (ret == -ENOSPC) { + spin_unlock_irqrestore(&admin_vq->lock, flags); + cpu_relax(); + goto again; + } + goto unlock_err; + } + if (!virtqueue_kick(vq)) + goto unlock_err; + spin_unlock_irqrestore(&admin_vq->lock, flags); - if (virtqueue_is_broken(vq)) - return -EIO; + wait_for_completion(&cmd->completion); - return 0; + return cmd->ret; + +unlock_err: + spin_unlock_irqrestore(&admin_vq->lock, flags); + return -EIO; } int vp_modern_admin_cmd_exec(struct virtio_device *vdev, @@ -122,12 +167,9 @@ int vp_modern_admin_cmd_exec(struct virtio_device *vdev, in_num++; } - mutex_lock(&vp_dev->admin_vq.cmd_lock); ret = virtqueue_exec_admin_cmd(&vp_dev->admin_vq, le16_to_cpu(cmd->opcode), - sgs, out_num, in_num, sgs); - mutex_unlock(&vp_dev->admin_vq.cmd_lock); - + sgs, out_num, in_num, cmd); if (ret) { dev_err(&vdev->dev, "Failed to execute command on admin vq: %d\n.", ret); @@ -188,25 +230,29 @@ end: static void vp_modern_avq_activate(struct virtio_device *vdev) { - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; - if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) return; - __virtqueue_unbreak(admin_vq->info.vq); virtio_pci_admin_cmd_list_init(vdev); } -static void vp_modern_avq_deactivate(struct virtio_device *vdev) +static void vp_modern_avq_cleanup(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; + struct virtio_admin_cmd *cmd; + struct virtqueue *vq; if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) return; - __virtqueue_break(admin_vq->info.vq); + vq = vp_dev->vqs[vp_dev->admin_vq.vq_index]->vq; + if (!vq) + return; + + while ((cmd = virtqueue_detach_unused_buf(vq))) { + cmd->ret = -EIO; + complete(&cmd->completion); + } } static void vp_transport_features(struct virtio_device *vdev, u64 features) @@ -403,7 +449,7 @@ static void vp_reset(struct virtio_device *vdev) while (vp_modern_get_status(mdev)) msleep(1); - vp_modern_avq_deactivate(vdev); + vp_modern_avq_cleanup(vdev); /* Flush pending VQ/configuration callbacks. */ vp_synchronize_vectors(vdev); @@ -552,8 +598,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, if (index >= vp_modern_get_num_queues(mdev) && !is_avq) return ERR_PTR(-EINVAL); - num = is_avq ? - VIRTIO_AVQ_SGS_MAX : vp_modern_get_queue_size(mdev, index); + num = vp_modern_get_queue_size(mdev, index); /* Check if queue is either not available or already active. */ if (!num || vp_modern_get_queue_enable(mdev, index)) return ERR_PTR(-ENOENT); @@ -580,12 +625,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, goto err; } - if (is_avq) { - mutex_lock(&vp_dev->admin_vq.cmd_lock); - vp_dev->admin_vq.info.vq = vq; - mutex_unlock(&vp_dev->admin_vq.cmd_lock); - } - return vq; err: @@ -620,12 +659,6 @@ static void del_vq(struct virtio_pci_vq_info *info) struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_modern_device *mdev = &vp_dev->mdev; - if (vp_is_avq(&vp_dev->vdev, vq->index)) { - mutex_lock(&vp_dev->admin_vq.cmd_lock); - vp_dev->admin_vq.info.vq = NULL; - mutex_unlock(&vp_dev->admin_vq.cmd_lock); - } - if (vp_dev->msix_enabled) vp_modern_queue_vector(mdev, vq->index, VIRTIO_MSI_NO_VECTOR); @@ -735,45 +768,6 @@ static bool vp_get_shm_region(struct virtio_device *vdev, return true; } -static int vp_modern_create_avq(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_admin_vq *avq; - struct virtqueue *vq; - u16 admin_q_num; - - if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) - return 0; - - admin_q_num = vp_modern_avq_num(&vp_dev->mdev); - if (!admin_q_num) - return -EINVAL; - - avq = &vp_dev->admin_vq; - avq->vq_index = vp_modern_avq_index(&vp_dev->mdev); - sprintf(avq->name, "avq.%u", avq->vq_index); - vq = vp_dev->setup_vq(vp_dev, &vp_dev->admin_vq.info, avq->vq_index, NULL, - avq->name, NULL, VIRTIO_MSI_NO_VECTOR); - if (IS_ERR(vq)) { - dev_err(&vdev->dev, "failed to setup admin virtqueue, err=%ld", - PTR_ERR(vq)); - return PTR_ERR(vq); - } - - vp_modern_set_queue_enable(&vp_dev->mdev, avq->info.vq->index, true); - return 0; -} - -static void vp_modern_destroy_avq(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - - if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) - return; - - vp_dev->del_vq(&vp_dev->admin_vq.info); -} - static const struct virtio_config_ops virtio_pci_config_nodev_ops = { .get = NULL, .set = NULL, @@ -792,8 +786,6 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { .get_shm_region = vp_get_shm_region, .disable_vq_and_reset = vp_modern_disable_vq_and_reset, .enable_vq_after_reset = vp_modern_enable_vq_after_reset, - .create_avq = vp_modern_create_avq, - .destroy_avq = vp_modern_destroy_avq, }; static const struct virtio_config_ops virtio_pci_config_ops = { @@ -814,8 +806,6 @@ static const struct virtio_config_ops virtio_pci_config_ops = { .get_shm_region = vp_get_shm_region, .disable_vq_and_reset = vp_modern_disable_vq_and_reset, .enable_vq_after_reset = vp_modern_enable_vq_after_reset, - .create_avq = vp_modern_create_avq, - .destroy_avq = vp_modern_destroy_avq, }; /* the PCI probing function */ @@ -839,11 +829,11 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) vp_dev->config_vector = vp_config_vector; vp_dev->setup_vq = setup_vq; vp_dev->del_vq = del_vq; - vp_dev->is_avq = vp_is_avq; + vp_dev->avq_index = vp_avq_index; vp_dev->isr = mdev->isr; vp_dev->vdev.id = mdev->id; - mutex_init(&vp_dev->admin_vq.cmd_lock); + spin_lock_init(&vp_dev->admin_vq.lock); return 0; } @@ -851,6 +841,5 @@ void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev) { struct virtio_pci_modern_device *mdev = &vp_dev->mdev; - mutex_destroy(&vp_dev->admin_vq.cmd_lock); vp_modern_remove(mdev); } diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 498442d0c216..2e49d978f504 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1223,8 +1223,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, block_group->space_info->total_bytes -= block_group->length; block_group->space_info->bytes_readonly -= (block_group->length - block_group->zone_unusable); - block_group->space_info->bytes_zone_unusable -= - block_group->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(fs_info, block_group->space_info, + -block_group->zone_unusable); block_group->space_info->disk_total -= block_group->length * factor; spin_unlock(&block_group->space_info->lock); @@ -1396,7 +1396,8 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes to readonly */ sinfo->bytes_readonly += cache->zone_unusable; - sinfo->bytes_zone_unusable -= cache->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, + -cache->zone_unusable); cache->zone_unusable = 0; } cache->ro++; @@ -3056,9 +3057,11 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache) if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes back */ cache->zone_unusable = - (cache->alloc_offset - cache->used) + + (cache->alloc_offset - cache->used - cache->pinned - + cache->reserved) + (cache->length - cache->zone_capacity); - sinfo->bytes_zone_unusable += cache->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, + cache->zone_unusable); sinfo->bytes_readonly -= cache->zone_unusable; } num_bytes = cache->length - cache->reserved - diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c8568b1a61c4..75fa563e4cac 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -459,6 +459,7 @@ struct btrfs_file_private { void *filldir_buf; u64 last_index; struct extent_state *llseek_cached_state; + bool fsync_skip_inode_lock; }; static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c index f9fb2db6a1e4..67adbe9d294a 100644 --- a/fs/btrfs/direct-io.c +++ b/fs/btrfs/direct-io.c @@ -856,21 +856,37 @@ relock: * So here we disable page faults in the iov_iter and then retry if we * got -EFAULT, faulting in the pages before the retry. */ +again: from->nofault = true; dio = btrfs_dio_write(iocb, from, written); from->nofault = false; - /* - * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync - * iocb, and that needs to lock the inode. So unlock it before calling - * iomap_dio_complete() to avoid a deadlock. - */ - btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); - - if (IS_ERR_OR_NULL(dio)) + if (IS_ERR_OR_NULL(dio)) { ret = PTR_ERR_OR_ZERO(dio); - else + } else { + struct btrfs_file_private stack_private = { 0 }; + struct btrfs_file_private *private; + const bool have_private = (file->private_data != NULL); + + if (!have_private) + file->private_data = &stack_private; + + /* + * If we have a synchronous write, we must make sure the fsync + * triggered by the iomap_dio_complete() call below doesn't + * deadlock on the inode lock - we are already holding it and we + * can't call it after unlocking because we may need to complete + * partial writes due to the input buffer (or parts of it) not + * being already faulted in. + */ + private = file->private_data; + private->fsync_skip_inode_lock = true; ret = iomap_dio_complete(dio); + private->fsync_skip_inode_lock = false; + + if (!have_private) + file->private_data = NULL; + } /* No increment (+=) because iomap returns a cumulative value. */ if (ret > 0) @@ -897,10 +913,12 @@ relock: } else { fault_in_iov_iter_readable(from, left); prev_left = left; - goto relock; + goto again; } } + btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); + /* * If 'ret' is -ENOTBLK or we have not written all data, then it means * we must fallback to buffered IO. diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d77498e7671c..ff9f0d41987e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2793,7 +2793,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, readonly = true; } else if (btrfs_is_zoned(fs_info)) { /* Need reset before reusing in a zoned block group */ - space_info->bytes_zone_unusable += len; + btrfs_space_info_update_bytes_zone_unusable(fs_info, space_info, + len); readonly = true; } spin_unlock(&cache->lock); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 81558f90ee80..23b65dc73c00 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -664,7 +664,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode, start_diff = start - em->start; em->start = start; em->len = end - start; - if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE && !extent_map_is_compressed(em)) + if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) em->offset += start_diff; return add_extent_mapping(inode, em, 0); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 21381de906f6..9f10a9f23fcc 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1603,6 +1603,7 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) */ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { + struct btrfs_file_private *private = file->private_data; struct dentry *dentry = file_dentry(file); struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); struct btrfs_root *root = inode->root; @@ -1612,6 +1613,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ret = 0, err; u64 len; bool full_sync; + const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false); trace_btrfs_sync_file(file, datasync); @@ -1639,7 +1641,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (ret) goto out; - btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + down_write(&inode->i_mmap_lock); + else + btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); atomic_inc(&root->log_batch); @@ -1663,7 +1668,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ ret = start_ordered_ops(inode, start, end); if (ret) { - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); goto out; } @@ -1788,7 +1796,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * file again, but that will end up using the synchronization * inside btrfs_sync_log to keep things safe. */ - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); if (ret == BTRFS_NO_LOG_SYNC) { ret = btrfs_end_transaction(trans); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 3f9b7507543a..f5996a43db24 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2723,8 +2723,10 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, * If the block group is read-only, we should account freed space into * bytes_readonly. */ - if (!block_group->ro) + if (!block_group->ro) { block_group->zone_unusable += to_unusable; + WARN_ON(block_group->zone_unusable > block_group->length); + } spin_unlock(&ctl->tree_lock); if (!used) { spin_lock(&block_group->lock); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 01eab6955647..19d05a4c5c33 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -714,8 +714,9 @@ out: return ret; } -static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset, - u64 end, +static noinline int cow_file_range_inline(struct btrfs_inode *inode, + struct page *locked_page, + u64 offset, u64 end, size_t compressed_size, int compress_type, struct folio *compressed_folio, @@ -739,7 +740,10 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset, return ret; } - extent_clear_unlock_delalloc(inode, offset, end, NULL, &cached, + if (ret == 0) + locked_page = NULL; + + extent_clear_unlock_delalloc(inode, offset, end, locked_page, &cached, clear_flags, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); @@ -1043,10 +1047,10 @@ again: * extent for the subpage case. */ if (total_in < actual_end) - ret = cow_file_range_inline(inode, start, end, 0, + ret = cow_file_range_inline(inode, NULL, start, end, 0, BTRFS_COMPRESS_NONE, NULL, false); else - ret = cow_file_range_inline(inode, start, end, total_compressed, + ret = cow_file_range_inline(inode, NULL, start, end, total_compressed, compress_type, folios[0], false); if (ret <= 0) { if (ret < 0) @@ -1359,7 +1363,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, if (!no_inline) { /* lets try to make an inline extent */ - ret = cow_file_range_inline(inode, start, end, 0, + ret = cow_file_range_inline(inode, locked_page, start, end, 0, BTRFS_COMPRESS_NONE, NULL, false); if (ret <= 0) { /* @@ -5660,7 +5664,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) struct inode *inode; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *sub_root = root; - struct btrfs_key location; + struct btrfs_key location = { 0 }; u8 di_type = 0; int ret = 0; diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 9ac94d3119e8..68e14fd48638 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -316,7 +316,7 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, found->bytes_used += block_group->used; found->disk_used += block_group->used * factor; found->bytes_readonly += block_group->bytes_super; - found->bytes_zone_unusable += block_group->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(info, found, block_group->zone_unusable); if (block_group->length > 0) found->full = 0; btrfs_try_granting_tickets(info, found); @@ -583,8 +583,7 @@ again: spin_lock(&cache->lock); avail = cache->length - cache->used - cache->pinned - - cache->reserved - cache->delalloc_bytes - - cache->bytes_super - cache->zone_unusable; + cache->reserved - cache->bytes_super - cache->zone_unusable; btrfs_info(fs_info, "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable (%llu bytes available) %s", cache->start, cache->length, cache->used, cache->pinned, diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index 4db8a0267c16..88b44221ce97 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -249,6 +249,7 @@ btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info, \ DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info"); DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned"); +DECLARE_SPACE_INFO_UPDATE(bytes_zone_unusable, "zone_unusable"); int btrfs_init_space_info(struct btrfs_fs_info *fs_info); void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index ebec4ab361b8..56e61ac1cc64 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -900,6 +900,102 @@ out: return ret; } +/* + * Test a regression for compressed extent map adjustment when we attempt to + * add an extent map that is partially overlapped by another existing extent + * map. The resulting extent map offset was left unchanged despite having + * incremented its start offset. + */ +static int test_case_8(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode) +{ + struct extent_map_tree *em_tree = &inode->extent_tree; + struct extent_map *em; + int ret; + int ret2; + + em = alloc_extent_map(); + if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); + return -ENOMEM; + } + + /* Compressed extent for the file range [120K, 128K). */ + em->start = SZ_1K * 120; + em->len = SZ_8K; + em->disk_num_bytes = SZ_4K; + em->ram_bytes = SZ_8K; + em->flags |= EXTENT_FLAG_COMPRESS_ZLIB; + write_lock(&em_tree->lock); + ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); + write_unlock(&em_tree->lock); + free_extent_map(em); + if (ret < 0) { + test_err("couldn't add extent map for range [120K, 128K)"); + goto out; + } + + em = alloc_extent_map(); + if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); + ret = -ENOMEM; + goto out; + } + + /* + * Compressed extent for the file range [108K, 144K), which overlaps + * with the [120K, 128K) we previously inserted. + */ + em->start = SZ_1K * 108; + em->len = SZ_1K * 36; + em->disk_num_bytes = SZ_4K; + em->ram_bytes = SZ_1K * 36; + em->flags |= EXTENT_FLAG_COMPRESS_ZLIB; + + /* + * Try to add the extent map but with a search range of [140K, 144K), + * this should succeed and adjust the extent map to the range + * [128K, 144K), with a length of 16K and an offset of 20K. + * + * This simulates a scenario where in the subvolume tree of an inode we + * have a compressed file extent item for the range [108K, 144K) and we + * have an overlapping compressed extent map for the range [120K, 128K), + * which was created by an encoded write, but its ordered extent was not + * yet completed, so the subvolume tree doesn't have yet the file extent + * item for that range - we only have the extent map in the inode's + * extent map tree. + */ + write_lock(&em_tree->lock); + ret = btrfs_add_extent_mapping(inode, &em, SZ_1K * 140, SZ_4K); + write_unlock(&em_tree->lock); + free_extent_map(em); + if (ret < 0) { + test_err("couldn't add extent map for range [108K, 144K)"); + goto out; + } + + if (em->start != SZ_128K) { + test_err("unexpected extent map start %llu (should be 128K)", em->start); + ret = -EINVAL; + goto out; + } + if (em->len != SZ_16K) { + test_err("unexpected extent map length %llu (should be 16K)", em->len); + ret = -EINVAL; + goto out; + } + if (em->offset != SZ_1K * 20) { + test_err("unexpected extent map offset %llu (should be 20K)", em->offset); + ret = -EINVAL; + goto out; + } +out: + ret2 = free_extent_map_tree(inode); + if (ret == 0) + ret = ret2; + + return ret; +} + struct rmap_test_vector { u64 raid_type; u64 physical_start; @@ -1078,6 +1174,9 @@ int btrfs_test_extent_map(void) ret = test_case_7(fs_info, BTRFS_I(inode)); if (ret) goto out; + ret = test_case_8(fs_info, BTRFS_I(inode)); + if (ret) + goto out; test_msg("running rmap tests"); for (i = 0; i < ARRAY_SIZE(rmap_tests); i++) { diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 89e009a4c3a3..a825fa598e3c 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1289,6 +1289,19 @@ static void extent_err(const struct extent_buffer *eb, int slot, va_end(args); } +static bool is_valid_dref_root(u64 rootid) +{ + /* + * The following tree root objectids are allowed to have a data backref: + * - subvolume trees + * - data reloc tree + * - tree root + * For v1 space cache + */ + return is_fstree(rootid) || rootid == BTRFS_DATA_RELOC_TREE_OBJECTID || + rootid == BTRFS_ROOT_TREE_OBJECTID; +} + static int check_extent_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot, struct btrfs_key *prev_key) @@ -1441,6 +1454,8 @@ static int check_extent_item(struct extent_buffer *leaf, struct btrfs_extent_data_ref *dref; struct btrfs_shared_data_ref *sref; u64 seq; + u64 dref_root; + u64 dref_objectid; u64 dref_offset; u64 inline_offset; u8 inline_type; @@ -1484,11 +1499,26 @@ static int check_extent_item(struct extent_buffer *leaf, */ case BTRFS_EXTENT_DATA_REF_KEY: dref = (struct btrfs_extent_data_ref *)(&iref->offset); + dref_root = btrfs_extent_data_ref_root(leaf, dref); + dref_objectid = btrfs_extent_data_ref_objectid(leaf, dref); dref_offset = btrfs_extent_data_ref_offset(leaf, dref); seq = hash_extent_data_ref( btrfs_extent_data_ref_root(leaf, dref), btrfs_extent_data_ref_objectid(leaf, dref), btrfs_extent_data_ref_offset(leaf, dref)); + if (unlikely(!is_valid_dref_root(dref_root))) { + extent_err(leaf, slot, + "invalid data ref root value %llu", + dref_root); + return -EUCLEAN; + } + if (unlikely(dref_objectid < BTRFS_FIRST_FREE_OBJECTID || + dref_objectid > BTRFS_LAST_FREE_OBJECTID)) { + extent_err(leaf, slot, + "invalid data ref objectid value %llu", + dref_root); + return -EUCLEAN; + } if (unlikely(!IS_ALIGNED(dref_offset, fs_info->sectorsize))) { extent_err(leaf, slot, @@ -1627,6 +1657,8 @@ static int check_extent_data_ref(struct extent_buffer *leaf, return -EUCLEAN; } for (; ptr < end; ptr += sizeof(*dref)) { + u64 root; + u64 objectid; u64 offset; /* @@ -1634,7 +1666,22 @@ static int check_extent_data_ref(struct extent_buffer *leaf, * overflow from the leaf due to hash collisions. */ dref = (struct btrfs_extent_data_ref *)ptr; + root = btrfs_extent_data_ref_root(leaf, dref); + objectid = btrfs_extent_data_ref_objectid(leaf, dref); offset = btrfs_extent_data_ref_offset(leaf, dref); + if (unlikely(!is_valid_dref_root(root))) { + extent_err(leaf, slot, + "invalid extent data backref root value %llu", + root); + return -EUCLEAN; + } + if (unlikely(objectid < BTRFS_FIRST_FREE_OBJECTID || + objectid > BTRFS_LAST_FREE_OBJECTID)) { + extent_err(leaf, slot, + "invalid extent data backref objectid value %llu", + root); + return -EUCLEAN; + } if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) { extent_err(leaf, slot, "invalid extent data backref offset, have %llu expect aligned to %u", diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index e98aa8219303..808c9c048276 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2016,6 +2016,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci) * CHECK_CAPS_AUTHONLY - we should only check the auth cap * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without * further delay. + * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without + * further delay. */ void ceph_check_caps(struct ceph_inode_info *ci, int flags) { @@ -2097,7 +2099,7 @@ retry: } doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s " - "flushing %s issued %s revoking %s retain %s %s%s%s\n", + "flushing %s issued %s revoking %s retain %s %s%s%s%s\n", inode, ceph_vinop(inode), ceph_cap_string(file_wanted), ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), ceph_cap_string(ci->i_flushing_caps), @@ -2105,7 +2107,8 @@ retry: ceph_cap_string(retain), (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "", - (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : ""); + (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "", + (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : ""); /* * If we no longer need to hold onto old our caps, and we may @@ -2180,6 +2183,11 @@ retry: queue_writeback = true; } + if (flags & CHECK_CAPS_FLUSH_FORCE) { + doutc(cl, "force to flush caps\n"); + goto ack; + } + if (cap == ci->i_auth_cap && (cap->issued & CEPH_CAP_FILE_WR)) { /* request larger max_size from MDS? */ @@ -3510,6 +3518,8 @@ static void handle_cap_grant(struct inode *inode, bool queue_invalidate = false; bool deleted_inode = false; bool fill_inline = false; + bool revoke_wait = false; + int flags = 0; /* * If there is at least one crypto block then we'll trust @@ -3705,16 +3715,18 @@ static void handle_cap_grant(struct inode *inode, ceph_cap_string(cap->issued), ceph_cap_string(newcaps), ceph_cap_string(revoking)); if (S_ISREG(inode->i_mode) && - (revoking & used & CEPH_CAP_FILE_BUFFER)) + (revoking & used & CEPH_CAP_FILE_BUFFER)) { writeback = true; /* initiate writeback; will delay ack */ - else if (queue_invalidate && + revoke_wait = true; + } else if (queue_invalidate && revoking == CEPH_CAP_FILE_CACHE && - (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) - ; /* do nothing yet, invalidation will be queued */ - else if (cap == ci->i_auth_cap) + (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) { + revoke_wait = true; /* do nothing yet, invalidation will be queued */ + } else if (cap == ci->i_auth_cap) { check_caps = 1; /* check auth cap only */ - else + } else { check_caps = 2; /* check all caps */ + } /* If there is new caps, try to wake up the waiters */ if (~cap->issued & newcaps) wake = true; @@ -3741,8 +3753,9 @@ static void handle_cap_grant(struct inode *inode, BUG_ON(cap->issued & ~cap->implemented); /* don't let check_caps skip sending a response to MDS for revoke msgs */ - if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { + if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { cap->mds_wanted = 0; + flags |= CHECK_CAPS_FLUSH_FORCE; if (cap == ci->i_auth_cap) check_caps = 1; /* check auth cap only */ else @@ -3798,9 +3811,9 @@ static void handle_cap_grant(struct inode *inode, mutex_unlock(&session->s_mutex); if (check_caps == 1) - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); + ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); else if (check_caps == 2) - ceph_check_caps(ci, CHECK_CAPS_NOINVAL); + ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL); } /* diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b63b4cd9b5b6..6e817bf1337c 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -200,9 +200,10 @@ struct ceph_cap { struct list_head caps_item; }; -#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ -#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ -#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ +#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ +#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */ struct ceph_cap_flush { u64 tid; diff --git a/fs/file.c b/fs/file.c index a3b72aa64f11..a11e59b5d602 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1248,6 +1248,7 @@ __releases(&files->file_lock) * tables and this condition does not arise without those. */ fdt = files_fdtable(files); + fd = array_index_nospec(fd, fdt->max_fds); tofree = fdt->fd[fd]; if (!tofree && fd_is_open(fd, fdt)) goto Ebusy; diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index 62d5fee3e5eb..ca2bd204bcc5 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -147,6 +147,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 49 -#define CIFS_VERSION "2.49" +#define SMB3_PRODUCT_BUILD 50 +#define CIFS_VERSION "2.50" #endif /* _CIFSFS_H */ diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 8e86fec7dcd2..f6d1f075987f 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1471,29 +1471,6 @@ struct cifs_io_parms { struct TCP_Server_Info *server; }; -struct cifs_aio_ctx { - struct kref refcount; - struct list_head list; - struct mutex aio_mutex; - struct completion done; - struct iov_iter iter; - struct kiocb *iocb; - struct cifsFileInfo *cfile; - struct bio_vec *bv; - loff_t pos; - unsigned int nr_pinned_pages; - ssize_t rc; - unsigned int len; - unsigned int total_len; - unsigned int bv_need_unpin; /* If ->bv[] needs unpinning */ - bool should_dirty; - /* - * Indicates if this aio_ctx is for direct_io, - * If yes, iter is a copy of the user passed iov_iter - */ - bool direct_io; -}; - struct cifs_io_request { struct netfs_io_request rreq; struct cifsFileInfo *cfile; @@ -2010,7 +1987,6 @@ require use of the stronger protocol */ * cifsFileInfo->file_info_lock cifsFileInfo->count cifs_new_fileinfo * ->invalidHandle initiate_cifs_search * ->oplock_break_cancelled - * cifs_aio_ctx->aio_mutex cifs_aio_ctx cifs_aio_ctx_alloc ****************************************************************************/ #ifdef DECLARE_GLOBALS_HERE diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index c15bb5ee7eb7..497bf3c447bc 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -619,8 +619,6 @@ int __cifs_calc_signature(struct smb_rqst *rqst, struct shash_desc *shash); enum securityEnum cifs_select_sectype(struct TCP_Server_Info *, enum securityEnum); -struct cifs_aio_ctx *cifs_aio_ctx_alloc(void); -void cifs_aio_ctx_release(struct kref *refcount); int cifs_alloc_hash(const char *name, struct shash_desc **sdesc); void cifs_free_hash(struct shash_desc **sdesc); diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 4a8aa1de9522..dd0afa23734c 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1042,13 +1042,26 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, } rc = -EOPNOTSUPP; - switch ((data->reparse.tag = tag)) { - case 0: /* SMB1 symlink */ + data->reparse.tag = tag; + if (!data->reparse.tag) { if (server->ops->query_symlink) { rc = server->ops->query_symlink(xid, tcon, cifs_sb, full_path, &data->symlink_target); } + if (rc == -EOPNOTSUPP) + data->reparse.tag = IO_REPARSE_TAG_INTERNAL; + } + + switch (data->reparse.tag) { + case 0: /* SMB1 symlink */ + break; + case IO_REPARSE_TAG_INTERNAL: + rc = 0; + if (le32_to_cpu(data->fi.Attributes) & ATTR_DIRECTORY) { + cifs_create_junction_fattr(fattr, sb); + goto out; + } break; case IO_REPARSE_TAG_MOUNT_POINT: cifs_create_junction_fattr(fattr, sb); diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 855ac5a62edf..44dbaf9929a4 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -170,7 +170,10 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon, static int cifs_shutdown(struct super_block *sb, unsigned long arg) { struct cifs_sb_info *sbi = CIFS_SB(sb); + struct tcon_link *tlink; + struct cifs_tcon *tcon; __u32 flags; + int rc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -178,14 +181,21 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) if (get_user(flags, (__u32 __user *)arg)) return -EFAULT; - if (flags > CIFS_GOING_FLAGS_NOLOGFLUSH) - return -EINVAL; + tlink = cifs_sb_tlink(sbi); + if (IS_ERR(tlink)) + return PTR_ERR(tlink); + tcon = tlink_tcon(tlink); + + trace_smb3_shutdown_enter(flags, tcon->tid); + if (flags > CIFS_GOING_FLAGS_NOLOGFLUSH) { + rc = -EINVAL; + goto shutdown_out_err; + } if (cifs_forced_shutdown(sbi)) - return 0; + goto shutdown_good; cifs_dbg(VFS, "shut down requested (%d)", flags); -/* trace_cifs_shutdown(sb, flags);*/ /* * see: @@ -201,7 +211,8 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) */ case CIFS_GOING_FLAGS_DEFAULT: cifs_dbg(FYI, "shutdown with default flag not supported\n"); - return -EINVAL; + rc = -EINVAL; + goto shutdown_out_err; /* * FLAGS_LOGFLUSH is easy since it asks to write out metadata (not * data) but metadata writes are not cached on the client, so can treat @@ -210,11 +221,18 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) case CIFS_GOING_FLAGS_LOGFLUSH: case CIFS_GOING_FLAGS_NOLOGFLUSH: sbi->mnt_cifs_flags |= CIFS_MOUNT_SHUTDOWN; - return 0; + goto shutdown_good; default: - return -EINVAL; + rc = -EINVAL; + goto shutdown_out_err; } + +shutdown_good: + trace_smb3_shutdown_done(flags, tcon->tid); return 0; +shutdown_out_err: + trace_smb3_shutdown_err(rc, flags, tcon->tid); + return rc; } static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug_info __user *in) diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 07c468ddb88a..b28ff62f1f15 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -995,60 +995,6 @@ parse_DFS_referrals_exit: return rc; } -struct cifs_aio_ctx * -cifs_aio_ctx_alloc(void) -{ - struct cifs_aio_ctx *ctx; - - /* - * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io - * to false so that we know when we have to unreference pages within - * cifs_aio_ctx_release() - */ - ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL); - if (!ctx) - return NULL; - - INIT_LIST_HEAD(&ctx->list); - mutex_init(&ctx->aio_mutex); - init_completion(&ctx->done); - kref_init(&ctx->refcount); - return ctx; -} - -void -cifs_aio_ctx_release(struct kref *refcount) -{ - struct cifs_aio_ctx *ctx = container_of(refcount, - struct cifs_aio_ctx, refcount); - - cifsFileInfo_put(ctx->cfile); - - /* - * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly - * which means that iov_iter_extract_pages() was a success and thus - * that we may have references or pins on pages that we need to - * release. - */ - if (ctx->bv) { - if (ctx->should_dirty || ctx->bv_need_unpin) { - unsigned int i; - - for (i = 0; i < ctx->nr_pinned_pages; i++) { - struct page *page = ctx->bv[i].bv_page; - - if (ctx->should_dirty) - set_page_dirty(page); - if (ctx->bv_need_unpin) - unpin_user_page(page); - } - } - kvfree(ctx->bv); - } - - kfree(ctx); -} - /** * cifs_alloc_hash - allocate hash and hash context together * @name: The name of the crypto hash algo diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index a0ffbda90733..689d8a506d45 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -505,6 +505,10 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, } switch (tag) { + case IO_REPARSE_TAG_INTERNAL: + if (!(fattr->cf_cifsattrs & ATTR_DIRECTORY)) + return false; + fallthrough; case IO_REPARSE_TAG_DFS: case IO_REPARSE_TAG_DFSR: case IO_REPARSE_TAG_MOUNT_POINT: diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h index 6b55d1df9e2f..2c0644bc4e65 100644 --- a/fs/smb/client/reparse.h +++ b/fs/smb/client/reparse.h @@ -12,6 +12,12 @@ #include "fs_context.h" #include "cifsglob.h" +/* + * Used only by cifs.ko to ignore reparse points from files when client or + * server doesn't support FSCTL_GET_REPARSE_POINT. + */ +#define IO_REPARSE_TAG_INTERNAL ((__u32)~0U) + static inline dev_t reparse_nfs_mkdev(struct reparse_posix_data *buf) { u64 v = le64_to_cpu(*(__le64 *)buf->DataBuffer); @@ -78,10 +84,19 @@ static inline u32 reparse_mode_wsl_tag(mode_t mode) static inline bool reparse_inode_match(struct inode *inode, struct cifs_fattr *fattr) { + struct cifsInodeInfo *cinode = CIFS_I(inode); struct timespec64 ctime = inode_get_ctime(inode); - return (CIFS_I(inode)->cifsAttrs & ATTR_REPARSE) && - CIFS_I(inode)->reparse_tag == fattr->cf_cifstag && + /* + * Do not match reparse tags when client or server doesn't support + * FSCTL_GET_REPARSE_POINT. @fattr->cf_cifstag should contain correct + * reparse tag from query dir response but the client won't be able to + * read the reparse point data anyway. This spares us a revalidation. + */ + if (cinode->reparse_tag != IO_REPARSE_TAG_INTERNAL && + cinode->reparse_tag != fattr->cf_cifstag) + return false; + return (cinode->cifsAttrs & ATTR_REPARSE) && timespec64_equal(&ctime, &fattr->cf_ctime); } diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 5c02a12251c8..9f5bc41433c1 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -930,6 +930,8 @@ int smb2_query_path_info(const unsigned int xid, switch (rc) { case 0: + rc = parse_create_response(data, cifs_sb, &out_iov[0]); + break; case -EOPNOTSUPP: /* * BB TODO: When support for special files added to Samba @@ -948,7 +950,8 @@ int smb2_query_path_info(const unsigned int xid, cmds[num_cmds++] = SMB2_OP_GET_REPARSE; oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, - FILE_READ_ATTRIBUTES | FILE_READ_EA, + FILE_READ_ATTRIBUTES | + FILE_READ_EA | SYNCHRONIZE, FILE_OPEN, create_options | OPEN_REPARSE_POINT, ACL_NO_MODE); cifs_get_readable_path(tcon, full_path, &cfile); @@ -1256,7 +1259,8 @@ int smb2_query_reparse_point(const unsigned int xid, cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); cifs_get_readable_path(tcon, full_path, &cfile); - oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, FILE_READ_ATTRIBUTES, + oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, + FILE_READ_ATTRIBUTES | FILE_READ_EA | SYNCHRONIZE, FILE_OPEN, OPEN_REPARSE_POINT, ACL_NO_MODE); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, &in_iov, diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 6b3bdfb97211..0f0c10c7ada7 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -1388,7 +1388,7 @@ DECLARE_EVENT_CLASS(smb3_ioctl_class, __entry->command = command; ), TP_printk("xid=%u fid=0x%llx ioctl cmd=0x%x", - __entry->xid, __entry->fid, __entry->command) + __entry->xid, __entry->fid, __entry->command) ) #define DEFINE_SMB3_IOCTL_EVENT(name) \ @@ -1400,9 +1400,58 @@ DEFINE_EVENT(smb3_ioctl_class, smb3_##name, \ DEFINE_SMB3_IOCTL_EVENT(ioctl); +DECLARE_EVENT_CLASS(smb3_shutdown_class, + TP_PROTO(__u32 flags, + __u32 tid), + TP_ARGS(flags, tid), + TP_STRUCT__entry( + __field(__u32, flags) + __field(__u32, tid) + ), + TP_fast_assign( + __entry->flags = flags; + __entry->tid = tid; + ), + TP_printk("flags=0x%x tid=0x%x", + __entry->flags, __entry->tid) +) + +#define DEFINE_SMB3_SHUTDOWN_EVENT(name) \ +DEFINE_EVENT(smb3_shutdown_class, smb3_##name, \ + TP_PROTO(__u32 flags, \ + __u32 tid), \ + TP_ARGS(flags, tid)) + +DEFINE_SMB3_SHUTDOWN_EVENT(shutdown_enter); +DEFINE_SMB3_SHUTDOWN_EVENT(shutdown_done); +DECLARE_EVENT_CLASS(smb3_shutdown_err_class, + TP_PROTO(int rc, + __u32 flags, + __u32 tid), + TP_ARGS(rc, flags, tid), + TP_STRUCT__entry( + __field(int, rc) + __field(__u32, flags) + __field(__u32, tid) + ), + TP_fast_assign( + __entry->rc = rc; + __entry->flags = flags; + __entry->tid = tid; + ), + TP_printk("rc=%d flags=0x%x tid=0x%x", + __entry->rc, __entry->flags, __entry->tid) +) +#define DEFINE_SMB3_SHUTDOWN_ERR_EVENT(name) \ +DEFINE_EVENT(smb3_shutdown_err_class, smb3_##name, \ + TP_PROTO(int rc, \ + __u32 flags, \ + __u32 tid), \ + TP_ARGS(rc, flags, tid)) +DEFINE_SMB3_SHUTDOWN_ERR_EVENT(shutdown_err); DECLARE_EVENT_CLASS(smb3_credit_class, TP_PROTO(__u64 currmid, diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index cb035da3f990..fb05f44f6c75 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -56,7 +56,7 @@ typedef uint8_t xfs_dqtype_t; * And, of course, we also need to take into account the dquot log format item * used to describe each dquot. */ -#define XFS_DQUOT_LOGRES(mp) \ +#define XFS_DQUOT_LOGRES \ ((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6) #define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 3dc8f785bf29..45aaf169806a 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -338,11 +338,11 @@ xfs_calc_write_reservation( blksz); t1 += adj; t3 += adj; - return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); + return XFS_DQUOT_LOGRES + max3(t1, t2, t3); } t4 = xfs_calc_refcountbt_reservation(mp, 1); - return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); + return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3)); } unsigned int @@ -410,11 +410,11 @@ xfs_calc_itruncate_reservation( xfs_refcountbt_block_count(mp, 4), blksz); - return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); + return XFS_DQUOT_LOGRES + max3(t1, t2, t3); } t4 = xfs_calc_refcountbt_reservation(mp, 2); - return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); + return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3)); } unsigned int @@ -466,7 +466,7 @@ STATIC uint xfs_calc_rename_reservation( struct xfs_mount *mp) { - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsigned int t1, t2, t3 = 0; @@ -577,7 +577,7 @@ STATIC uint xfs_calc_link_reservation( struct xfs_mount *mp) { - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsigned int t1, t2, t3 = 0; @@ -641,7 +641,7 @@ STATIC uint xfs_calc_remove_reservation( struct xfs_mount *mp) { - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsigned int t1, t2, t3 = 0; @@ -729,7 +729,7 @@ xfs_calc_icreate_reservation( struct xfs_mount *mp) { struct xfs_trans_resv *resp = M_RES(mp); - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; unsigned int t1, t2, t3 = 0; t1 = xfs_calc_icreate_resv_alloc(mp); @@ -747,7 +747,7 @@ STATIC uint xfs_calc_create_tmpfile_reservation( struct xfs_mount *mp) { - uint res = XFS_DQUOT_LOGRES(mp); + uint res = XFS_DQUOT_LOGRES; res += xfs_calc_icreate_resv_alloc(mp); return res + xfs_calc_iunlink_add_reservation(mp); @@ -829,7 +829,7 @@ STATIC uint xfs_calc_ifree_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_iunlink_remove_reservation(mp) + @@ -846,7 +846,7 @@ STATIC uint xfs_calc_ichange_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); @@ -955,7 +955,7 @@ STATIC uint xfs_calc_addafork_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + @@ -1003,7 +1003,7 @@ STATIC uint xfs_calc_attrsetm_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)); @@ -1043,7 +1043,7 @@ STATIC uint xfs_calc_attrrm_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + max((xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)) + diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 0dbc484b182f..2f98d90d7fd6 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -696,7 +696,7 @@ xrep_agfl_init_header( * step. */ xagb_bitmap_init(&af.used_extents); - af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp), + af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp); xagb_bitmap_walk(agfl_extents, xrep_agfl_fill, &af); error = xagb_bitmap_disunion(agfl_extents, &af.used_extents); if (error) diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 733c410a2279..91e7b51ce068 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -799,7 +799,7 @@ xchk_parent_pptr( } if (pp->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - goto out_pp; + goto out_names; /* * Complain if the number of parent pointers doesn't match the link diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 92ef4cdc486e..c886d5d0eb02 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -959,18 +959,16 @@ TRACE_EVENT(xfile_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, 256) + __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char pathname[257]; char *path; __entry->ino = file_inode(xf->file)->i_ino; - memset(pathname, 0, sizeof(pathname)); - path = file_path(xf->file, pathname, sizeof(pathname) - 1); + path = file_path(xf->file, __entry->pathname, MAXNAMELEN); if (IS_ERR(path)) - path = "(unknown)"; - strncpy(__entry->pathname, path, sizeof(__entry->pathname)); + strncpy(__entry->pathname, "(unknown)", + sizeof(__entry->pathname)); ), TP_printk("xfino 0x%lx path '%s'", __entry->ino, diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 5c947e5ce8b8..7db386304875 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -139,7 +139,7 @@ xfs_attr_shortform_list( sbp->name = sfe->nameval; sbp->namelen = sfe->namelen; /* These are bytes, and both on-disk, don't endian-flip */ - sbp->value = &sfe->nameval[sfe->namelen], + sbp->value = &sfe->nameval[sfe->namelen]; sbp->valuelen = sfe->valuelen; sbp->flags = sfe->flags; sbp->hash = xfs_attr_hashval(dp->i_mount, sfe->flags, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 5646d300b286..180ce697305a 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -4715,20 +4715,18 @@ TRACE_EVENT(xmbuf_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, 256) + __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char pathname[257]; char *path; struct file *file = btp->bt_file; __entry->dev = btp->bt_mount->m_super->s_dev; __entry->ino = file_inode(file)->i_ino; - memset(pathname, 0, sizeof(pathname)); - path = file_path(file, pathname, sizeof(pathname) - 1); + path = file_path(file, __entry->pathname, MAXNAMELEN); if (IS_ERR(path)) - path = "(unknown)"; - strncpy(__entry->pathname, path, sizeof(__entry->pathname)); + strncpy(__entry->pathname, "(unknown)", + sizeof(__entry->pathname)); ), TP_printk("dev %d:%d xmino 0x%lx path '%s'", MAJOR(__entry->dev), MINOR(__entry->dev), diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index ab3d22f662f2..eaf849260bd6 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -110,7 +110,24 @@ xfs_attr_change( args->whichfork = XFS_ATTR_FORK; xfs_attr_sethash(args); - return xfs_attr_set(args, op, args->attr_filter & XFS_ATTR_ROOT); + /* + * Some xattrs must be resistant to allocation failure at ENOSPC, e.g. + * creating an inode with ACLs or security attributes requires the + * allocation of the xattr holding that information to succeed. Hence + * we allow xattrs in the VFS TRUSTED, SYSTEM, POSIX_ACL and SECURITY + * (LSM xattr) namespaces to dip into the reserve block pool to allow + * manipulation of these xattrs when at ENOSPC. These VFS xattr + * namespaces translate to the XFS_ATTR_ROOT and XFS_ATTR_SECURE on-disk + * namespaces. + * + * For most of these cases, these special xattrs will fit in the inode + * itself and so consume no extra space or only require temporary extra + * space while an overwrite is being made. Hence the use of the reserved + * pool is largely to avoid the worst case reservation from preventing + * the xattr from being created at ENOSPC. + */ + return xfs_attr_set(args, op, + args->attr_filter & (XFS_ATTR_ROOT | XFS_ATTR_SECURE)); } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index ad6afc5c4918..1ae44793132a 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -911,13 +911,12 @@ #define CON_INITCALL \ BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end) -#define RUNTIME_NAME(t,x) runtime_##t##_##x +#define NAMED_SECTION(name) \ + . = ALIGN(8); \ + name : AT(ADDR(name) - LOAD_OFFSET) \ + { BOUNDED_SECTION_PRE_LABEL(name, name, __start_, __stop_) } -#define RUNTIME_CONST(t,x) \ - . = ALIGN(8); \ - RUNTIME_NAME(t,x) : AT(ADDR(RUNTIME_NAME(t,x)) - LOAD_OFFSET) { \ - *(RUNTIME_NAME(t,x)); \ - } +#define RUNTIME_CONST(t,x) NAMED_SECTION(runtime_##t##_##x) /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */ #define KUNIT_TABLE() \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 2594553bb30b..2df665fa2964 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -297,6 +297,15 @@ static inline void *offset_to_ptr(const int *off) #define is_unsigned_type(type) (!is_signed_type(type)) /* + * Useful shorthand for "is this condition known at compile-time?" + * + * Note that the condition may involve non-constant values, + * but the compiler may know enough about the details of the + * values to determine that the condition is statically true. + */ +#define statically_true(x) (__builtin_constant_p(x) && (x)) + +/* * This is needed in functions which generate the stack canary, see * arch/x86/kernel/smpboot.c::start_secondary() for an example. */ diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 51ba681b915a..9316c39260e0 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -100,7 +100,6 @@ enum cpuhp_state { CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, - CPUHP_PROFILE_PREPARE, CPUHP_X2APIC_PREPARE, CPUHP_SMPCFD_PREPARE, CPUHP_RELAY_PREPARE, @@ -148,6 +147,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_LOONGARCH_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_IRQ_RISCV_IMSIC_STARTING, + CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, CPUHP_AP_PERF_X86_STARTING, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 689e8be873a7..79a6b1a63027 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2414,7 +2414,7 @@ static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn } bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, - unsigned long attrs); + unsigned long mask, unsigned long attrs); bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, @@ -2445,11 +2445,11 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, } #endif /* CONFIG_KVM_PRIVATE_MEM */ -#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); -bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); #endif +#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM /** * kvm_gmem_populate() - Populate/prepare a GPA range with guest data * @@ -2476,8 +2476,9 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque); +#endif -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); #endif diff --git a/include/linux/minmax.h b/include/linux/minmax.h index e3e4353df983..98008dd92153 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -26,19 +26,63 @@ #define __typecheck(x, y) \ (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) -/* is_signed_type() isn't a constexpr for pointer types */ -#define __is_signed(x) \ - __builtin_choose_expr(__is_constexpr(is_signed_type(typeof(x))), \ - is_signed_type(typeof(x)), 0) +/* + * __sign_use for integer expressions: + * bit #0 set if ok for unsigned comparisons + * bit #1 set if ok for signed comparisons + * + * In particular, statically non-negative signed integer + * expressions are ok for both. + * + * NOTE! Unsigned types smaller than 'int' are implicitly + * converted to 'int' in expressions, and are accepted for + * signed conversions for now. This is debatable. + * + * Note that 'x' is the original expression, and 'ux' is + * the unique variable that contains the value. + * + * We use 'ux' for pure type checking, and 'x' for when + * we need to look at the value (but without evaluating + * it for side effects! Careful to only ever evaluate it + * with sizeof() or __builtin_constant_p() etc). + * + * Pointers end up being checked by the normal C type + * rules at the actual comparison, and these expressions + * only need to be careful to not cause warnings for + * pointer use. + */ +#define __signed_type_use(x,ux) (2+__is_nonneg(x,ux)) +#define __unsigned_type_use(x,ux) (1+2*(sizeof(ux)<4)) +#define __sign_use(x,ux) (is_signed_type(typeof(ux))? \ + __signed_type_use(x,ux):__unsigned_type_use(x,ux)) -/* True for a non-negative signed int constant */ -#define __is_noneg_int(x) \ - (__builtin_choose_expr(__is_constexpr(x) && __is_signed(x), x, -1) >= 0) +/* + * To avoid warnings about casting pointers to integers + * of different sizes, we need that special sign type. + * + * On 64-bit we can just always use 'long', since any + * integer or pointer type can just be cast to that. + * + * This does not work for 128-bit signed integers since + * the cast would truncate them, but we do not use s128 + * types in the kernel (we do use 'u128', but they will + * be handled by the !is_signed_type() case). + * + * NOTE! The cast is there only to avoid any warnings + * from when values that aren't signed integer types. + */ +#ifdef CONFIG_64BIT + #define __signed_type(ux) long +#else + #define __signed_type(ux) typeof(__builtin_choose_expr(sizeof(ux)>4,1LL,1L)) +#endif +#define __is_nonneg(x,ux) statically_true((__signed_type(ux))(x)>=0) + +#define __types_ok(x,y,ux,uy) \ + (__sign_use(x,ux) & __sign_use(y,uy)) -#define __types_ok(x, y, ux, uy) \ - (__is_signed(ux) == __is_signed(uy) || \ - __is_signed((ux) + 0) == __is_signed((uy) + 0) || \ - __is_noneg_int(x) || __is_noneg_int(y)) +#define __types_ok3(x,y,z,ux,uy,uz) \ + (__sign_use(x,ux) & __sign_use(y,uy) & __sign_use(z,uz)) #define __cmp_op_min < #define __cmp_op_max > @@ -53,8 +97,8 @@ #define __careful_cmp_once(op, x, y, ux, uy) ({ \ __auto_type ux = (x); __auto_type uy = (y); \ - static_assert(__types_ok(x, y, ux, uy), \ - #op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \ + BUILD_BUG_ON_MSG(!__types_ok(x,y,ux,uy), \ + #op"("#x", "#y") signedness error"); \ __cmp(op, ux, uy); }) #define __careful_cmp(op, x, y) \ @@ -70,8 +114,8 @@ static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), \ (lo) <= (hi), true), \ "clamp() low limit " #lo " greater than high limit " #hi); \ - static_assert(__types_ok(uval, lo, uval, ulo), "clamp() 'lo' signedness error"); \ - static_assert(__types_ok(uval, hi, uval, uhi), "clamp() 'hi' signedness error"); \ + BUILD_BUG_ON_MSG(!__types_ok3(val,lo,hi,uval,ulo,uhi), \ + "clamp("#val", "#lo", "#hi") signedness error"); \ __clamp(uval, ulo, uhi); }) #define __careful_clamp(val, lo, hi) \ @@ -108,13 +152,20 @@ #define umax(x, y) \ __careful_cmp(max, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull) +#define __careful_op3(op, x, y, z, ux, uy, uz) ({ \ + __auto_type ux = (x); __auto_type uy = (y);__auto_type uz = (z);\ + BUILD_BUG_ON_MSG(!__types_ok3(x,y,z,ux,uy,uz), \ + #op"3("#x", "#y", "#z") signedness error"); \ + __cmp(op, ux, __cmp(op, uy, uz)); }) + /** * min3 - return minimum of three values * @x: first value * @y: second value * @z: third value */ -#define min3(x, y, z) min((typeof(x))min(x, y), z) +#define min3(x, y, z) \ + __careful_op3(min, x, y, z, __UNIQUE_ID(x_), __UNIQUE_ID(y_), __UNIQUE_ID(z_)) /** * max3 - return maximum of three values @@ -122,7 +173,8 @@ * @y: second value * @z: third value */ -#define max3(x, y, z) max((typeof(x))max(x, y), z) +#define max3(x, y, z) \ + __careful_op3(max, x, y, z, __UNIQUE_ID(x_), __UNIQUE_ID(y_), __UNIQUE_ID(z_)) /** * min_not_zero - return the minimum that is _not_ zero, unless both are zero diff --git a/include/linux/profile.h b/include/linux/profile.h index 2fb487f61d12..3f53cdb0c27c 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -10,7 +10,6 @@ #define CPU_PROFILING 1 #define SCHED_PROFILING 2 -#define SLEEP_PROFILING 3 #define KVM_PROFILING 4 struct proc_dir_entry; diff --git a/include/linux/virtio.h b/include/linux/virtio.h index ecc5cb7b8c91..4b16844c6bc2 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -10,6 +10,7 @@ #include <linux/mod_devicetable.h> #include <linux/gfp.h> #include <linux/dma-mapping.h> +#include <linux/completion.h> /** * struct virtqueue - a queue to register buffers for sending or receiving. @@ -109,6 +110,8 @@ struct virtio_admin_cmd { __le64 group_member_id; struct scatterlist *data_sg; struct scatterlist *result_sg; + struct completion completion; + int ret; }; /** diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index ab4b9a3fef6b..169c7d367fac 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -104,8 +104,6 @@ struct virtqueue_info { * Returns 0 on success or error status * If disable_vq_and_reset is set, then enable_vq_after_reset must also be * set. - * @create_avq: create admin virtqueue resource. - * @destroy_avq: destroy admin virtqueue resource. */ struct virtio_config_ops { void (*get)(struct virtio_device *vdev, unsigned offset, @@ -133,8 +131,6 @@ struct virtio_config_ops { struct virtio_shm_region *region, u8 id); int (*disable_vq_and_reset)(struct virtqueue *vq); int (*enable_vq_after_reset)(struct virtqueue *vq); - int (*create_avq)(struct virtio_device *vdev); - void (*destroy_avq)(struct virtio_device *vdev); }; /* If driver didn't advertise the feature, it will never appear. */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index d1d7825318c3..6c395a2600e8 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -56,7 +56,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, unsigned int thlen = 0; unsigned int p_off = 0; unsigned int ip_proto; - u64 ret, remainder, gso_size; if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -99,16 +98,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16)); - if (hdr->gso_size) { - gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); - ret = div64_u64_rem(skb->len, gso_size, &remainder); - if (!(ret && (hdr->gso_size > needed) && - ((remainder > needed) || (remainder == 0)))) { - return -EINVAL; - } - skb_shinfo(skb)->tx_flags |= SKBFL_SHARED_FRAG; - } - if (!pskb_may_pull(skb, needed)) return -EINVAL; @@ -182,6 +171,11 @@ retry: if (gso_type != SKB_GSO_UDP_L4) return -EINVAL; break; + case SKB_GSO_TCPV4: + case SKB_GSO_TCPV6: + if (skb->csum_offset != offsetof(struct tcphdr, check)) + return -EINVAL; + break; } /* Kernel has a special handling for GSO_BY_FRAGS. */ diff --git a/include/sound/ump_convert.h b/include/sound/ump_convert.h index 28c364c63245..d099ae27f849 100644 --- a/include/sound/ump_convert.h +++ b/include/sound/ump_convert.h @@ -13,6 +13,7 @@ struct ump_cvt_to_ump_bank { unsigned char cc_nrpn_msb, cc_nrpn_lsb; unsigned char cc_data_msb, cc_data_lsb; unsigned char cc_bank_msb, cc_bank_lsb; + bool cc_data_msb_set, cc_data_lsb_set; }; /* context for converting from MIDI1 byte stream to UMP packet */ diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 246c0fbd582e..0a523023bdcc 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2383,6 +2383,14 @@ DEFINE_EVENT(btrfs__space_info_update, update_bytes_pinned, TP_ARGS(fs_info, sinfo, old, diff) ); +DEFINE_EVENT(btrfs__space_info_update, update_bytes_zone_unusable, + + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_space_info *sinfo, u64 old, s64 diff), + + TP_ARGS(fs_info, sinfo, old, diff) +); + DECLARE_EVENT_CLASS(btrfs_raid56_bio, TP_PROTO(const struct btrfs_raid_bio *rbio, diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index 09e72215b9f9..085b749cdd97 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -34,7 +34,7 @@ TRACE_EVENT(mptcp_subflow_get_send, struct sock *ssk; __entry->active = mptcp_subflow_active(subflow); - __entry->backup = subflow->backup; + __entry->backup = subflow->backup || subflow->request_bkup; if (subflow->tcp_sock && sk_fullsock(subflow->tcp_sock)) __entry->free = sk_stream_memory_free(subflow->tcp_sock); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 985a262d0f9e..5bf6148cac2b 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -841,11 +841,8 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) #define __NR_mseal 462 __SYSCALL(__NR_mseal, sys_mseal) -#define __NR_uretprobe 463 -__SYSCALL(__NR_uretprobe, sys_uretprobe) - #undef __NR_syscalls -#define __NR_syscalls 464 +#define __NR_syscalls 463 /* * 32 bit systems traditionally used different diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index a43b14276bc3..cac0cdb9a916 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1109,6 +1109,7 @@ struct ufs_hba { bool ext_iid_sup; bool scsi_host_added; bool mcq_sup; + bool lsdb_sup; bool mcq_enabled; struct ufshcd_res_info res[RES_MAX]; void __iomem *mcq_base; diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index 38fe97971a65..9917c7743d80 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -77,6 +77,7 @@ enum { MASK_OUT_OF_ORDER_DATA_DELIVERY_SUPPORT = 0x02000000, MASK_UIC_DME_TEST_MODE_SUPPORT = 0x04000000, MASK_CRYPTO_SUPPORT = 0x10000000, + MASK_LSDB_SUPPORT = 0x20000000, MASK_MCQ_SUPPORT = 0x40000000, }; diff --git a/init/Kconfig b/init/Kconfig index a465ea9525bd..37260d17267e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1902,6 +1902,7 @@ config RUST depends on !MODVERSIONS depends on !GCC_PLUGINS depends on !RANDSTRUCT + depends on !SHADOW_CALL_STACK depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE help Enables Rust support in the kernel. diff --git a/io_uring/napi.c b/io_uring/napi.c index 4fd6bb331e1e..a3dc3762008f 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -205,7 +205,6 @@ void io_napi_init(struct io_ring_ctx *ctx) void io_napi_free(struct io_ring_ctx *ctx) { struct io_napi_entry *e; - LIST_HEAD(napi_list); unsigned int i; spin_lock(&ctx->napi_lock); @@ -315,7 +314,6 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) */ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) { - LIST_HEAD(napi_list); bool is_stale = false; if (!READ_ONCE(ctx->napi_busy_poll_dt)) diff --git a/io_uring/poll.c b/io_uring/poll.c index 0a8e02944689..1f63b60e85e7 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -347,6 +347,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts) v &= IO_POLL_REF_MASK; } while (atomic_sub_return(v, &req->poll_refs) & IO_POLL_REF_MASK); + io_napi_add(req); return IOU_POLL_NO_ACTION; } diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 4ad5ed8adf96..6dc76b590703 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -236,7 +236,7 @@ void static_key_disable_cpuslocked(struct static_key *key) } jump_label_lock(); - if (atomic_cmpxchg(&key->enabled, 1, 0)) + if (atomic_cmpxchg(&key->enabled, 1, 0) == 1) jump_label_update(key); jump_label_unlock(); } @@ -289,7 +289,7 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key) return; guard(mutex)(&jump_label_mutex); - if (atomic_cmpxchg(&key->enabled, 1, 0)) + if (atomic_cmpxchg(&key->enabled, 1, 0) == 1) jump_label_update(key); else WARN_ON_ONCE(!static_key_slow_try_dec(key)); diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 07fb5987b42b..1bab21b4718f 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -92,7 +92,14 @@ static ssize_t profiling_store(struct kobject *kobj, const char *buf, size_t count) { int ret; + static DEFINE_MUTEX(lock); + /* + * We need serialization, for profile_setup() initializes prof_on + * value and profile_init() must not reallocate prof_buffer after + * once allocated. + */ + guard(mutex)(&lock); if (prof_on) return -EEXIST; /* diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index f5a36e67b593..ac2e22502741 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -357,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; - enum vcpu_state old = vcpu_halted; + u8 old = vcpu_halted; /* * If the vCPU is indeed halted, advance its state to match that of * pv_wait_node(). If OTOH this fails, the vCPU was running and will diff --git a/kernel/profile.c b/kernel/profile.c index 2b775cc5c28f..1fcf1adcf4eb 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -47,30 +47,14 @@ static unsigned short int prof_shift; int prof_on __read_mostly; EXPORT_SYMBOL_GPL(prof_on); -static cpumask_var_t prof_cpu_mask; -#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) -static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); -static DEFINE_PER_CPU(int, cpu_profile_flip); -static DEFINE_MUTEX(profile_flip_mutex); -#endif /* CONFIG_SMP */ - int profile_setup(char *str) { static const char schedstr[] = "schedule"; - static const char sleepstr[] = "sleep"; static const char kvmstr[] = "kvm"; const char *select = NULL; int par; - if (!strncmp(str, sleepstr, strlen(sleepstr))) { -#ifdef CONFIG_SCHEDSTATS - force_schedstat_enabled(); - prof_on = SLEEP_PROFILING; - select = sleepstr; -#else - pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n"); -#endif /* CONFIG_SCHEDSTATS */ - } else if (!strncmp(str, schedstr, strlen(schedstr))) { + if (!strncmp(str, schedstr, strlen(schedstr))) { prof_on = SCHED_PROFILING; select = schedstr; } else if (!strncmp(str, kvmstr, strlen(kvmstr))) { @@ -114,11 +98,6 @@ int __ref profile_init(void) buffer_bytes = prof_len*sizeof(atomic_t); - if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) - return -ENOMEM; - - cpumask_copy(prof_cpu_mask, cpu_possible_mask); - prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL|__GFP_NOWARN); if (prof_buffer) return 0; @@ -132,195 +111,16 @@ int __ref profile_init(void) if (prof_buffer) return 0; - free_cpumask_var(prof_cpu_mask); return -ENOMEM; } -#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) -/* - * Each cpu has a pair of open-addressed hashtables for pending - * profile hits. read_profile() IPI's all cpus to request them - * to flip buffers and flushes their contents to prof_buffer itself. - * Flip requests are serialized by the profile_flip_mutex. The sole - * use of having a second hashtable is for avoiding cacheline - * contention that would otherwise happen during flushes of pending - * profile hits required for the accuracy of reported profile hits - * and so resurrect the interrupt livelock issue. - * - * The open-addressed hashtables are indexed by profile buffer slot - * and hold the number of pending hits to that profile buffer slot on - * a cpu in an entry. When the hashtable overflows, all pending hits - * are accounted to their corresponding profile buffer slots with - * atomic_add() and the hashtable emptied. As numerous pending hits - * may be accounted to a profile buffer slot in a hashtable entry, - * this amortizes a number of atomic profile buffer increments likely - * to be far larger than the number of entries in the hashtable, - * particularly given that the number of distinct profile buffer - * positions to which hits are accounted during short intervals (e.g. - * several seconds) is usually very small. Exclusion from buffer - * flipping is provided by interrupt disablement (note that for - * SCHED_PROFILING or SLEEP_PROFILING profile_hit() may be called from - * process context). - * The hash function is meant to be lightweight as opposed to strong, - * and was vaguely inspired by ppc64 firmware-supported inverted - * pagetable hash functions, but uses a full hashtable full of finite - * collision chains, not just pairs of them. - * - * -- nyc - */ -static void __profile_flip_buffers(void *unused) -{ - int cpu = smp_processor_id(); - - per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu); -} - -static void profile_flip_buffers(void) -{ - int i, j, cpu; - - mutex_lock(&profile_flip_mutex); - j = per_cpu(cpu_profile_flip, get_cpu()); - put_cpu(); - on_each_cpu(__profile_flip_buffers, NULL, 1); - for_each_online_cpu(cpu) { - struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j]; - for (i = 0; i < NR_PROFILE_HIT; ++i) { - if (!hits[i].hits) { - if (hits[i].pc) - hits[i].pc = 0; - continue; - } - atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); - hits[i].hits = hits[i].pc = 0; - } - } - mutex_unlock(&profile_flip_mutex); -} - -static void profile_discard_flip_buffers(void) -{ - int i, cpu; - - mutex_lock(&profile_flip_mutex); - i = per_cpu(cpu_profile_flip, get_cpu()); - put_cpu(); - on_each_cpu(__profile_flip_buffers, NULL, 1); - for_each_online_cpu(cpu) { - struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i]; - memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit)); - } - mutex_unlock(&profile_flip_mutex); -} - -static void do_profile_hits(int type, void *__pc, unsigned int nr_hits) -{ - unsigned long primary, secondary, flags, pc = (unsigned long)__pc; - int i, j, cpu; - struct profile_hit *hits; - - pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1); - i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; - secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; - cpu = get_cpu(); - hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)]; - if (!hits) { - put_cpu(); - return; - } - /* - * We buffer the global profiler buffer into a per-CPU - * queue and thus reduce the number of global (and possibly - * NUMA-alien) accesses. The write-queue is self-coalescing: - */ - local_irq_save(flags); - do { - for (j = 0; j < PROFILE_GRPSZ; ++j) { - if (hits[i + j].pc == pc) { - hits[i + j].hits += nr_hits; - goto out; - } else if (!hits[i + j].hits) { - hits[i + j].pc = pc; - hits[i + j].hits = nr_hits; - goto out; - } - } - i = (i + secondary) & (NR_PROFILE_HIT - 1); - } while (i != primary); - - /* - * Add the current hit(s) and flush the write-queue out - * to the global buffer: - */ - atomic_add(nr_hits, &prof_buffer[pc]); - for (i = 0; i < NR_PROFILE_HIT; ++i) { - atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); - hits[i].pc = hits[i].hits = 0; - } -out: - local_irq_restore(flags); - put_cpu(); -} - -static int profile_dead_cpu(unsigned int cpu) -{ - struct page *page; - int i; - - if (cpumask_available(prof_cpu_mask)) - cpumask_clear_cpu(cpu, prof_cpu_mask); - - for (i = 0; i < 2; i++) { - if (per_cpu(cpu_profile_hits, cpu)[i]) { - page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[i]); - per_cpu(cpu_profile_hits, cpu)[i] = NULL; - __free_page(page); - } - } - return 0; -} - -static int profile_prepare_cpu(unsigned int cpu) -{ - int i, node = cpu_to_mem(cpu); - struct page *page; - - per_cpu(cpu_profile_flip, cpu) = 0; - - for (i = 0; i < 2; i++) { - if (per_cpu(cpu_profile_hits, cpu)[i]) - continue; - - page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); - if (!page) { - profile_dead_cpu(cpu); - return -ENOMEM; - } - per_cpu(cpu_profile_hits, cpu)[i] = page_address(page); - - } - return 0; -} - -static int profile_online_cpu(unsigned int cpu) -{ - if (cpumask_available(prof_cpu_mask)) - cpumask_set_cpu(cpu, prof_cpu_mask); - - return 0; -} - -#else /* !CONFIG_SMP */ -#define profile_flip_buffers() do { } while (0) -#define profile_discard_flip_buffers() do { } while (0) - static void do_profile_hits(int type, void *__pc, unsigned int nr_hits) { unsigned long pc; pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift; - atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]); + if (pc < prof_len) + atomic_add(nr_hits, &prof_buffer[pc]); } -#endif /* !CONFIG_SMP */ void profile_hits(int type, void *__pc, unsigned int nr_hits) { @@ -334,8 +134,8 @@ void profile_tick(int type) { struct pt_regs *regs = get_irq_regs(); - if (!user_mode(regs) && cpumask_available(prof_cpu_mask) && - cpumask_test_cpu(smp_processor_id(), prof_cpu_mask)) + /* This is the old kernel-only legacy profiling */ + if (!user_mode(regs)) profile_hit(type, (void *)profile_pc(regs)); } @@ -358,7 +158,6 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos) char *pnt; unsigned long sample_step = 1UL << prof_shift; - profile_flip_buffers(); if (p >= (prof_len+1)*sizeof(unsigned int)) return 0; if (count > (prof_len+1)*sizeof(unsigned int) - p) @@ -404,7 +203,6 @@ static ssize_t write_profile(struct file *file, const char __user *buf, return -EINVAL; } #endif - profile_discard_flip_buffers(); memset(prof_buffer, 0, prof_len * sizeof(atomic_t)); return count; } @@ -418,40 +216,14 @@ static const struct proc_ops profile_proc_ops = { int __ref create_proc_profile(void) { struct proc_dir_entry *entry; -#ifdef CONFIG_SMP - enum cpuhp_state online_state; -#endif - int err = 0; if (!prof_on) return 0; -#ifdef CONFIG_SMP - err = cpuhp_setup_state(CPUHP_PROFILE_PREPARE, "PROFILE_PREPARE", - profile_prepare_cpu, profile_dead_cpu); - if (err) - return err; - - err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "AP_PROFILE_ONLINE", - profile_online_cpu, NULL); - if (err < 0) - goto err_state_prep; - online_state = err; - err = 0; -#endif entry = proc_create("profile", S_IWUSR | S_IRUGO, NULL, &profile_proc_ops); - if (!entry) - goto err_state_onl; - proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t)); - - return err; -err_state_onl: -#ifdef CONFIG_SMP - cpuhp_remove_state(online_state); -err_state_prep: - cpuhp_remove_state(CPUHP_PROFILE_PREPARE); -#endif + if (entry) + proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t)); return err; } subsys_initcall(create_proc_profile); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a9f655025607..f3951e4a55e5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7845,6 +7845,30 @@ void set_rq_offline(struct rq *rq) } } +static inline void sched_set_rq_online(struct rq *rq, int cpu) +{ + struct rq_flags rf; + + rq_lock_irqsave(rq, &rf); + if (rq->rd) { + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + set_rq_online(rq); + } + rq_unlock_irqrestore(rq, &rf); +} + +static inline void sched_set_rq_offline(struct rq *rq, int cpu) +{ + struct rq_flags rf; + + rq_lock_irqsave(rq, &rf); + if (rq->rd) { + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + set_rq_offline(rq); + } + rq_unlock_irqrestore(rq, &rf); +} + /* * used to mark begin/end of suspend/resume: */ @@ -7895,10 +7919,25 @@ static int cpuset_cpu_inactive(unsigned int cpu) return 0; } +static inline void sched_smt_present_inc(int cpu) +{ +#ifdef CONFIG_SCHED_SMT + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + static_branch_inc_cpuslocked(&sched_smt_present); +#endif +} + +static inline void sched_smt_present_dec(int cpu) +{ +#ifdef CONFIG_SCHED_SMT + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + static_branch_dec_cpuslocked(&sched_smt_present); +#endif +} + int sched_cpu_activate(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); - struct rq_flags rf; /* * Clear the balance_push callback and prepare to schedule @@ -7906,13 +7945,10 @@ int sched_cpu_activate(unsigned int cpu) */ balance_push_set(cpu, false); -#ifdef CONFIG_SCHED_SMT /* * When going up, increment the number of cores with SMT present. */ - if (cpumask_weight(cpu_smt_mask(cpu)) == 2) - static_branch_inc_cpuslocked(&sched_smt_present); -#endif + sched_smt_present_inc(cpu); set_cpu_active(cpu, true); if (sched_smp_initialized) { @@ -7930,12 +7966,7 @@ int sched_cpu_activate(unsigned int cpu) * 2) At runtime, if cpuset_cpu_active() fails to rebuild the * domains. */ - rq_lock_irqsave(rq, &rf); - if (rq->rd) { - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); - set_rq_online(rq); - } - rq_unlock_irqrestore(rq, &rf); + sched_set_rq_online(rq, cpu); return 0; } @@ -7943,7 +7974,6 @@ int sched_cpu_activate(unsigned int cpu) int sched_cpu_deactivate(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); - struct rq_flags rf; int ret; /* @@ -7974,20 +8004,14 @@ int sched_cpu_deactivate(unsigned int cpu) */ synchronize_rcu(); - rq_lock_irqsave(rq, &rf); - if (rq->rd) { - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); - set_rq_offline(rq); - } - rq_unlock_irqrestore(rq, &rf); + sched_set_rq_offline(rq, cpu); -#ifdef CONFIG_SCHED_SMT /* * When going down, decrement the number of cores with SMT present. */ - if (cpumask_weight(cpu_smt_mask(cpu)) == 2) - static_branch_dec_cpuslocked(&sched_smt_present); + sched_smt_present_dec(cpu); +#ifdef CONFIG_SCHED_SMT sched_core_cpu_deactivate(cpu); #endif @@ -7997,6 +8021,8 @@ int sched_cpu_deactivate(unsigned int cpu) sched_update_numa(cpu, false); ret = cpuset_cpu_inactive(cpu); if (ret) { + sched_smt_present_inc(cpu); + sched_set_rq_online(rq, cpu); balance_push_set(cpu, false); set_cpu_active(cpu, true); sched_update_numa(cpu, true); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index a5e00293ae43..0bed0fa1acd9 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -582,6 +582,12 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, } stime = mul_u64_u64_div_u64(stime, rtime, stime + utime); + /* + * Because mul_u64_u64_div_u64() can approximate on some + * achitectures; enforce the constraint that: a*b/(b+c) <= a. + */ + if (unlikely(stime > rtime)) + stime = rtime; update: /* diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 78e48f5426ee..eb0cdcd4d921 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -92,16 +92,6 @@ void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p, trace_sched_stat_blocked(p, delta); - /* - * Blocking time is in units of nanosecs, so shift by - * 20 to get a milliseconds-range estimation of the - * amount of time that the task spent sleeping: - */ - if (unlikely(prof_on == SLEEP_PROFILING)) { - profile_hits(SLEEP_PROFILING, - (void *)get_wchan(p), - delta >> 20); - } account_scheduler_latency(p, delta >> 10, 0); } } diff --git a/kernel/task_work.c b/kernel/task_work.c index 5c2daa7ad3f9..5d14d639ac71 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -6,12 +6,14 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ +#ifdef CONFIG_IRQ_WORK static void task_work_set_notify_irq(struct irq_work *entry) { test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); } static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) = IRQ_WORK_INIT_HARD(task_work_set_notify_irq); +#endif /** * task_work_add - ask the @task to execute @work->func() @@ -57,6 +59,8 @@ int task_work_add(struct task_struct *task, struct callback_head *work, if (notify == TWA_NMI_CURRENT) { if (WARN_ON_ONCE(task != current)) return -EINVAL; + if (!IS_ENABLED(CONFIG_IRQ_WORK)) + return -EINVAL; } else { /* record the work call stack in order to print it in KASAN reports */ kasan_record_aux_stack(work); @@ -81,9 +85,11 @@ int task_work_add(struct task_struct *task, struct callback_head *work, case TWA_SIGNAL_NO_IPI: __set_notify_signal(task); break; +#ifdef CONFIG_IRQ_WORK case TWA_NMI_CURRENT: irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume)); break; +#endif default: WARN_ON_ONCE(1); break; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index d25ba49e313c..d0538a75f4c6 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -246,7 +246,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, wd_delay = cycles_to_nsec_safe(watchdog, *wdnow, wd_end); if (wd_delay <= WATCHDOG_MAX_SKEW) { - if (nretries > 1 || nretries >= max_retries) { + if (nretries > 1 && nretries >= max_retries) { pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n", smp_processor_id(), watchdog->name, nretries); } diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index b4843099a8da..ed58eebb4e8f 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -1141,7 +1141,6 @@ void tick_broadcast_switch_to_oneshot(void) #ifdef CONFIG_HOTPLUG_CPU void hotplug_cpu__broadcast_tick_pull(int deadcpu) { - struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *bc; unsigned long flags; @@ -1167,6 +1166,8 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu) * device to avoid the starvation. */ if (tick_check_broadcast_expired()) { + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); + cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask); tick_program_event(td->evtdev->next_event, 1); } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 8a4ebd93adfc..06da8ac13dca 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -119,13 +119,6 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state) case DISCOVERY_STARTING: break; case DISCOVERY_FINDING: - /* If discovery was not started then it was initiated by the - * MGMT interface so no MGMT event shall be generated either - */ - if (old_state != DISCOVERY_STARTING) { - hdev->discovery.state = old_state; - return; - } mgmt_discovering(hdev, 1); break; case DISCOVERY_RESOLVING: diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index dce8035ca799..d0c118c47f6c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1721,9 +1721,10 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable) switch (enable) { case LE_SCAN_ENABLE: hci_dev_set_flag(hdev, HCI_LE_SCAN); - if (hdev->le_scan_type == LE_SCAN_ACTIVE) + if (hdev->le_scan_type == LE_SCAN_ACTIVE) { clear_pending_adv_report(hdev); - hci_discovery_set_state(hdev, DISCOVERY_FINDING); + hci_discovery_set_state(hdev, DISCOVERY_FINDING); + } break; case LE_SCAN_DISABLE: diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index cd2ed16da8a4..a31d39a821f4 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2976,6 +2976,27 @@ static int hci_passive_scan_sync(struct hci_dev *hdev) */ filter_policy = hci_update_accept_list_sync(hdev); + /* If suspended and filter_policy set to 0x00 (no acceptlist) then + * passive scanning cannot be started since that would require the host + * to be woken up to process the reports. + */ + if (hdev->suspended && !filter_policy) { + /* Check if accept list is empty then there is no need to scan + * while suspended. + */ + if (list_empty(&hdev->le_accept_list)) + return 0; + + /* If there are devices is the accept_list that means some + * devices could not be programmed which in non-suspended case + * means filter_policy needs to be set to 0x00 so the host needs + * to filter, but since this is treating suspended case we + * can ignore device needing host to filter to allow devices in + * the acceptlist to be able to wakeup the system. + */ + filter_policy = 0x01; + } + /* When the controller is using random resolvable addresses and * with that having LE privacy enabled, then controllers with * Extended Scanner Filter Policies support can now enable support diff --git a/net/core/dev.c b/net/core/dev.c index 6ea1d20676fb..751d9b70e6ad 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5150,6 +5150,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb) bpf_net_ctx_clear(bpf_net_ctx); return XDP_DROP; } + bpf_net_ctx_clear(bpf_net_ctx); } return XDP_PASS; out_redir: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 87e67194f240..73fd7f543fd0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3288,7 +3288,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (ifm->ifi_index > 0) dev = __dev_get_by_index(tgt_net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) - dev = rtnl_dev_get(net, tb); + dev = rtnl_dev_get(tgt_net, tb); else if (tb[IFLA_GROUP]) err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP])); else diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 983fee76f5cf..8ca13208d240 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1331,13 +1331,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); const struct ethtool_ops *ops = dev->ethtool_ops; u32 dev_indir_size = 0, dev_key_size = 0, i; + u32 user_indir_len = 0, indir_bytes = 0; struct ethtool_rxfh_param rxfh_dev = {}; struct ethtool_rxfh_context *ctx = NULL; struct netlink_ext_ack *extack = NULL; struct ethtool_rxnfc rx_rings; struct ethtool_rxfh rxfh; bool locked = false; /* dev->ethtool->rss_lock taken */ - u32 indir_bytes = 0; bool create = false; u8 *rss_config; int ret; @@ -1382,10 +1382,9 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh.input_xfrm == RXH_XFRM_NO_CHANGE)) return -EINVAL; - if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) - indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]); + indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]); - rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER); + rss_config = kzalloc(indir_bytes + dev_key_size, GFP_USER); if (!rss_config) return -ENOMEM; @@ -1400,6 +1399,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, */ if (rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) { + user_indir_len = indir_bytes; rxfh_dev.indir = (u32 *)rss_config; rxfh_dev.indir_size = dev_indir_size; ret = ethtool_copy_validate_indir(rxfh_dev.indir, @@ -1426,7 +1426,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh_dev.key_size = dev_key_size; rxfh_dev.key = rss_config + indir_bytes; if (copy_from_user(rxfh_dev.key, - useraddr + rss_cfg_offset + indir_bytes, + useraddr + rss_cfg_offset + user_indir_len, rxfh.key_size)) { ret = -EFAULT; goto out; @@ -1474,16 +1474,21 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh_dev.input_xfrm = rxfh.input_xfrm; if (rxfh.rss_context && ops->create_rxfh_context) { - if (create) + if (create) { ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev, extack); - else if (rxfh_dev.rss_delete) + /* Make sure driver populates defaults */ + WARN_ON_ONCE(!ret && !rxfh_dev.key && + !memchr_inv(ethtool_rxfh_context_key(ctx), + 0, ctx->key_size)); + } else if (rxfh_dev.rss_delete) { ret = ops->remove_rxfh_context(dev, ctx, rxfh.rss_context, extack); - else + } else { ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev, extack); + } } else { ret = ops->set_rxfh(dev, &rxfh_dev, extack); } @@ -1522,6 +1527,22 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, kfree(ctx); goto out; } + + /* Fetch the defaults for the old API, in the new API drivers + * should write defaults into ctx themselves. + */ + rxfh_dev.indir = (u32 *)rss_config; + rxfh_dev.indir_size = dev_indir_size; + + rxfh_dev.key = rss_config + indir_bytes; + rxfh_dev.key_size = dev_key_size; + + ret = ops->get_rxfh(dev, &rxfh_dev); + if (WARN_ON(ret)) { + xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); + kfree(ctx); + goto out; + } } if (rxfh_dev.rss_delete) { WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx); @@ -1530,12 +1551,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (rxfh_dev.indir) { for (i = 0; i < dev_indir_size; i++) ethtool_rxfh_context_indir(ctx)[i] = rxfh_dev.indir[i]; - ctx->indir_configured = 1; + ctx->indir_configured = + rxfh.indir_size && + rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE; } if (rxfh_dev.key) { memcpy(ethtool_rxfh_context_key(ctx), rxfh_dev.key, dev_key_size); - ctx->key_configured = 1; + ctx->key_configured = !!rxfh.key_size; } if (rxfh_dev.hfunc != ETH_RSS_HASH_NO_CHANGE) ctx->hfunc = rxfh_dev.hfunc; diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 71679137eff2..5c4c4505ab9a 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -111,7 +111,8 @@ rss_reply_size(const struct ethnl_req_info *req_base, const struct rss_reply_data *data = RSS_REPDATA(reply_base); int len; - len = nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */ + len = nla_total_size(sizeof(u32)) + /* _RSS_CONTEXT */ + nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */ nla_total_size(sizeof(u32)) + /* _RSS_INPUT_XFRM */ nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */ nla_total_size(data->hkey_size); /* _RSS_HKEY */ @@ -124,6 +125,11 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct rss_reply_data *data = RSS_REPDATA(reply_base); + struct rss_req_info *request = RSS_REQINFO(req_base); + + if (request->rss_context && + nla_put_u32(skb, ETHTOOL_A_RSS_CONTEXT, request->rss_context)) + return -EMSGSIZE; if ((data->hfunc && nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) || diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 4d42d0756fd7..a5db7c67d61b 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -145,25 +145,27 @@ static struct pernet_operations iptable_nat_net_ops = { static int __init iptable_nat_init(void) { - int ret = xt_register_template(&nf_nat_ipv4_table, - iptable_nat_table_init); + int ret; + /* net->gen->ptr[iptable_nat_net_id] must be allocated + * before calling iptable_nat_table_init(). + */ + ret = register_pernet_subsys(&iptable_nat_net_ops); if (ret < 0) return ret; - ret = register_pernet_subsys(&iptable_nat_net_ops); - if (ret < 0) { - xt_unregister_template(&nf_nat_ipv4_table); - return ret; - } + ret = xt_register_template(&nf_nat_ipv4_table, + iptable_nat_table_init); + if (ret < 0) + unregister_pernet_subsys(&iptable_nat_net_ops); return ret; } static void __exit iptable_nat_exit(void) { - unregister_pernet_subsys(&iptable_nat_net_ops); xt_unregister_template(&nf_nat_ipv4_table); + unregister_pernet_subsys(&iptable_nat_net_ops); } module_init(iptable_nat_init); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 454362e359da..e2b9583ed96a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -754,8 +754,7 @@ void tcp_rcv_space_adjust(struct sock *sk) * <prev RTT . ><current RTT .. ><next RTT .... > */ - if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && - !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)) { u64 rcvwin, grow; int rcvbuf; @@ -771,12 +770,22 @@ void tcp_rcv_space_adjust(struct sock *sk) rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin), READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); - if (rcvbuf > sk->sk_rcvbuf) { - WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); + if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { + if (rcvbuf > sk->sk_rcvbuf) { + WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); - /* Make the window clamp follow along. */ - WRITE_ONCE(tp->window_clamp, - tcp_win_from_space(sk, rcvbuf)); + /* Make the window clamp follow along. */ + WRITE_ONCE(tp->window_clamp, + tcp_win_from_space(sk, rcvbuf)); + } + } else { + /* Make the window clamp follow along while being bounded + * by SO_RCVBUF. + */ + int clamp = tcp_win_from_space(sk, min(rcvbuf, sk->sk_rcvbuf)); + + if (clamp > tp->window_clamp) + WRITE_ONCE(tp->window_clamp, clamp); } } tp->rcvq_space.space = copied; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 4b791e74529e..e4ad3311e148 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -140,6 +140,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (thlen < sizeof(*th)) goto out; + if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb))) + goto out; + if (!pskb_may_pull(skb, thlen)) goto out; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index aa2e0a28ca61..bc8a9da750fe 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -278,6 +278,10 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, if (gso_skb->len <= sizeof(*uh) + mss) return ERR_PTR(-EINVAL); + if (unlikely(skb_checksum_start(gso_skb) != + skb_transport_header(gso_skb))) + return ERR_PTR(-EINVAL); + if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh), diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 70a0b2ad6bd7..b8eec1b6cc2c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -227,6 +227,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, return NULL; memset(ndopts, 0, sizeof(*ndopts)); while (opt_len) { + bool unknown = false; int l; if (opt_len < sizeof(struct nd_opt_hdr)) return NULL; @@ -262,22 +263,23 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, break; #endif default: - if (ndisc_is_useropt(dev, nd_opt)) { - ndopts->nd_useropts_end = nd_opt; - if (!ndopts->nd_useropts) - ndopts->nd_useropts = nd_opt; - } else { - /* - * Unknown options must be silently ignored, - * to accommodate future extension to the - * protocol. - */ - ND_PRINTK(2, notice, - "%s: ignored unsupported option; type=%d, len=%d\n", - __func__, - nd_opt->nd_opt_type, - nd_opt->nd_opt_len); - } + unknown = true; + } + if (ndisc_is_useropt(dev, nd_opt)) { + ndopts->nd_useropts_end = nd_opt; + if (!ndopts->nd_useropts) + ndopts->nd_useropts = nd_opt; + } else if (unknown) { + /* + * Unknown options must be silently ignored, + * to accommodate future extension to the + * protocol. + */ + ND_PRINTK(2, notice, + "%s: ignored unsupported option; type=%d, len=%d\n", + __func__, + nd_opt->nd_opt_type, + nd_opt->nd_opt_len); } next_opt: opt_len -= l; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 52cf104e3478..e119d4f090cc 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -147,23 +147,27 @@ static struct pernet_operations ip6table_nat_net_ops = { static int __init ip6table_nat_init(void) { - int ret = xt_register_template(&nf_nat_ipv6_table, - ip6table_nat_table_init); + int ret; + /* net->gen->ptr[ip6table_nat_net_id] must be allocated + * before calling ip6t_nat_register_lookups(). + */ + ret = register_pernet_subsys(&ip6table_nat_net_ops); if (ret < 0) return ret; - ret = register_pernet_subsys(&ip6table_nat_net_ops); + ret = xt_register_template(&nf_nat_ipv6_table, + ip6table_nat_table_init); if (ret) - xt_unregister_template(&nf_nat_ipv6_table); + unregister_pernet_subsys(&ip6table_nat_net_ops); return ret; } static void __exit ip6table_nat_exit(void) { - unregister_pernet_subsys(&ip6table_nat_net_ops); xt_unregister_template(&nf_nat_ipv6_table); + unregister_pernet_subsys(&ip6table_nat_net_ops); } module_init(ip6table_nat_init); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c3b0b610b0aa..c00323fa9eb6 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -335,8 +335,8 @@ static void iucv_sever_path(struct sock *sk, int with_user_data) struct iucv_sock *iucv = iucv_sk(sk); struct iucv_path *path = iucv->path; - if (iucv->path) { - iucv->path = NULL; + /* Whoever resets the path pointer, must sever and free it. */ + if (xchg(&iucv->path, NULL)) { if (with_user_data) { low_nmcpy(user_data, iucv->src_name); high_nmcpy(user_data, iucv->dst_name); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 85cb71de370f..b02b84ce2130 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -114,7 +114,7 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, /* apply all changes now - no failures allowed */ - if (monitor_sdata) + if (monitor_sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) ieee80211_set_mu_mimo_follow(monitor_sdata, params); if (params->flags) { @@ -3053,6 +3053,9 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { + if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) + return -EOPNOTSUPP; + sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (!sdata) @@ -3115,7 +3118,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, if (has_monitor) { sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); - if (sdata) { + if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { sdata->deflink.user_power_level = local->user_power_level; if (txp_type != sdata->vif.bss_conf.txpower_type) update_txp_type = true; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 72a9ba8bc5fd..edba4a31844f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1768,7 +1768,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local, break; } sdata = rcu_dereference(local->monitor_sdata); - if (sdata) { + if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { vif = &sdata->vif; info->hw_queue = vif->hw_queue[skb_get_queue_mapping(skb)]; @@ -3957,7 +3957,8 @@ begin: break; } tx.sdata = rcu_dereference(local->monitor_sdata); - if (tx.sdata) { + if (tx.sdata && + ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { vif = &tx.sdata->vif; info->hw_queue = vif->hw_queue[skb_get_queue_mapping(skb)]; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ced19ce7c51a..c7ad9bc5973a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -776,7 +776,7 @@ static void __iterate_interfaces(struct ieee80211_local *local, sdata = rcu_dereference_check(local->monitor_sdata, lockdep_is_held(&local->iflist_mtx) || lockdep_is_held(&local->hw.wiphy->mtx)); - if (sdata && + if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) && (iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL || !active_only || sdata->flags & IEEE80211_SDATA_IN_DRIVER)) iterator(data, sdata->vif.addr, &sdata->vif); diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index c30405e76833..7884217f33eb 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -19,7 +19,9 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX), + SNMP_MIB_ITEM("MPJoinSynBackupRx", MPTCP_MIB_JOINSYNBACKUPRX), SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX), + SNMP_MIB_ITEM("MPJoinSynAckBackupRx", MPTCP_MIB_JOINSYNACKBACKUPRX), SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 2704afd0dfe4..66aa67f49d03 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -14,7 +14,9 @@ enum linux_mptcp_mib_field { MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */ MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */ MPTCP_MIB_JOINSYNRX, /* Received a SYN + MP_JOIN */ + MPTCP_MIB_JOINSYNBACKUPRX, /* Received a SYN + MP_JOIN + backup flag */ MPTCP_MIB_JOINSYNACKRX, /* Received a SYN/ACK + MP_JOIN */ + MPTCP_MIB_JOINSYNACKBACKUPRX, /* Received a SYN/ACK + MP_JOIN + backup flag */ MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */ MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */ MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 8e8dcfbc2993..8a68382a4fe9 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -909,7 +909,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, return true; } else if (subflow_req->mp_join) { opts->suboptions = OPTION_MPTCP_MPJ_SYNACK; - opts->backup = subflow_req->backup; + opts->backup = subflow_req->request_bkup; opts->join_id = subflow_req->local_id; opts->thmac = subflow_req->thmac; opts->nonce = subflow_req->local_nonce; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 55406720c607..23bb89c94e90 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -426,6 +426,18 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) return mptcp_pm_nl_get_local_id(msk, &skc_local); } +bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc) +{ + struct mptcp_addr_info skc_local; + + mptcp_local_address((struct sock_common *)skc, &skc_local); + + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_is_backup(msk, &skc_local); + + return mptcp_pm_nl_is_backup(msk, &skc_local); +} + int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex) { diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index ea9e5817b9e9..37954a0b087d 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -471,7 +471,6 @@ static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_con slow = lock_sock_fast(ssk); if (prio) { subflow->send_mp_prio = 1; - subflow->backup = backup; subflow->request_bkup = backup; } @@ -1102,6 +1101,24 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc return ret; } +bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + struct mptcp_pm_addr_entry *entry; + bool backup = false; + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { + if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) { + backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + break; + } + } + rcu_read_unlock(); + + return backup; +} + #define MPTCP_PM_CMD_GRP_OFFSET 0 #define MPTCP_PM_EV_GRP_OFFSET 1 @@ -1401,6 +1418,7 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, ret = remove_anno_list_by_saddr(msk, addr); if (ret || force) { spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= ret; mptcp_pm_remove_addr(msk, &list); spin_unlock_bh(&msk->pm.lock); } @@ -1534,16 +1552,25 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }; struct mptcp_pm_addr_entry *entry; + int anno_nr = 0; list_for_each_entry(entry, rm_list, list) { - if ((remove_anno_list_by_saddr(msk, &entry->addr) || - lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) && - alist.nr < MPTCP_RM_IDS_MAX) - alist.ids[alist.nr++] = entry->addr.id; + if (alist.nr >= MPTCP_RM_IDS_MAX) + break; + + /* only delete if either announced or matching a subflow */ + if (remove_anno_list_by_saddr(msk, &entry->addr)) + anno_nr++; + else if (!lookup_subflow_by_saddr(&msk->conn_list, + &entry->addr)) + continue; + + alist.ids[alist.nr++] = entry->addr.id; } if (alist.nr) { spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= anno_nr; mptcp_pm_remove_addr(msk, &alist); spin_unlock_bh(&msk->pm.lock); } @@ -1556,17 +1583,18 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, rm_list, list) { - if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) && - slist.nr < MPTCP_RM_IDS_MAX) + if (slist.nr < MPTCP_RM_IDS_MAX && + lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) slist.ids[slist.nr++] = entry->addr.id; - if (remove_anno_list_by_saddr(msk, &entry->addr) && - alist.nr < MPTCP_RM_IDS_MAX) + if (alist.nr < MPTCP_RM_IDS_MAX && + remove_anno_list_by_saddr(msk, &entry->addr)) alist.ids[alist.nr++] = entry->addr.id; } if (alist.nr) { spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= alist.nr; mptcp_pm_remove_addr(msk, &alist); spin_unlock_bh(&msk->pm.lock); } diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index f0a4590506c6..8eaa9fbe3e34 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -165,6 +165,24 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); } +bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, + struct mptcp_addr_info *skc) +{ + struct mptcp_pm_addr_entry *entry; + bool backup = false; + + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (mptcp_addresses_equal(&entry->addr, skc, false)) { + backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + break; + } + } + spin_unlock_bh(&msk->pm.lock); + + return backup; +} + int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a26c2c840fd9..0d536b183a6c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -350,8 +350,10 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, skb_orphan(skb); /* try to fetch required memory from subflow */ - if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) + if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); goto drop; + } has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; @@ -844,10 +846,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) sk_rbuf = ssk_rbuf; /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ - if (__mptcp_rmem(sk) > sk_rbuf) { - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); + if (__mptcp_rmem(sk) > sk_rbuf) return; - } /* Wake-up the reader only for in-sequence data */ mptcp_data_lock(sk); @@ -1422,13 +1422,15 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) } mptcp_for_each_subflow(msk, subflow) { + bool backup = subflow->backup || subflow->request_bkup; + trace_mptcp_subflow_get_send(subflow); ssk = mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; tout = max(tout, mptcp_timeout_from_subflow(subflow)); - nr_active += !subflow->backup; + nr_active += !backup; pace = subflow->avg_pacing_rate; if (unlikely(!pace)) { /* init pacing rate from socket */ @@ -1439,9 +1441,9 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) } linger_time = div_u64((u64)READ_ONCE(ssk->sk_wmem_queued) << 32, pace); - if (linger_time < send_info[subflow->backup].linger_time) { - send_info[subflow->backup].ssk = ssk; - send_info[subflow->backup].linger_time = linger_time; + if (linger_time < send_info[backup].linger_time) { + send_info[backup].ssk = ssk; + send_info[backup].linger_time = linger_time; } } __mptcp_set_timeout(sk, tout); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index b11a4e50d52b..60c6b073d65f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -448,6 +448,7 @@ struct mptcp_subflow_request_sock { u16 mp_capable : 1, mp_join : 1, backup : 1, + request_bkup : 1, csum_reqd : 1, allow_join_id0 : 1; u8 local_id; @@ -1108,6 +1109,9 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc); +bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 39e2cbdf3801..a21c712350c3 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -100,6 +100,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req) return NULL; } subflow_req->local_id = local_id; + subflow_req->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)req); return msk; } @@ -168,6 +169,9 @@ static int subflow_check_req(struct request_sock *req, return 0; } else if (opt_mp_join) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); + + if (mp_opt.backup) + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX); } if (opt_mp_capable && listener->request_mptcp) { @@ -577,6 +581,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->mp_join = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); + if (subflow->backup) + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKBACKUPRX); + if (subflow_use_different_dport(msk, sk)) { pr_debug("synack inet_dport=%d %d", ntohs(inet_sk(sk)->inet_dport), @@ -614,6 +621,8 @@ static int subflow_chk_local_id(struct sock *sk) return err; subflow_set_local_id(subflow, err); + subflow->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)sk); + return 0; } @@ -1221,14 +1230,22 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; - u32 incr; + struct tcp_sock *tp = tcp_sk(ssk); + u32 offset, incr, avail_len; - incr = limit >= skb->len ? skb->len + fin : limit; + offset = tp->copied_seq - TCP_SKB_CB(skb)->seq; + if (WARN_ON_ONCE(offset > skb->len)) + goto out; - pr_debug("discarding=%d len=%d seq=%d", incr, skb->len, - subflow->map_subflow_seq); + avail_len = skb->len - offset; + incr = limit >= avail_len ? avail_len + fin : limit; + + pr_debug("discarding=%d len=%d offset=%d seq=%d", incr, skb->len, + offset, subflow->map_subflow_seq); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA); tcp_sk(ssk)->copied_seq += incr; + +out: if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq)) sk_eat_skb(ssk, skb); if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) @@ -2005,6 +2022,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->fully_established = 1; new_ctx->remote_key_valid = 1; new_ctx->backup = subflow_req->backup; + new_ctx->request_bkup = subflow_req->request_bkup; WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 113b907da0f7..3ba8e7e739b5 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -44,6 +44,8 @@ static DEFINE_MUTEX(zones_mutex); struct zones_ht_key { struct net *net; u16 zone; + /* Note : pad[] must be the last field. */ + u8 pad[]; }; struct tcf_ct_flow_table { @@ -60,7 +62,7 @@ struct tcf_ct_flow_table { static const struct rhashtable_params zones_params = { .head_offset = offsetof(struct tcf_ct_flow_table, node), .key_offset = offsetof(struct tcf_ct_flow_table, key), - .key_len = sizeof_field(struct tcf_ct_flow_table, key), + .key_len = offsetof(struct zones_ht_key, pad), .automatic_shrinking = true, }; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 73a875573e7a..8e3093938cd2 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3319,10 +3319,8 @@ int smc_create_clcsk(struct net *net, struct sock *sk, int family) rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &smc->clcsock); - if (rc) { - sk_common_release(sk); + if (rc) return rc; - } /* smc_clcsock_release() does not wait smc->clcsock->sk's * destruction; its sk_state might not be TCP_CLOSE after @@ -3368,6 +3366,9 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, smc->clcsock = clcsock; else rc = smc_create_clcsk(net, sk, family); + + if (rc) + sk_common_release(sk); out: return rc; } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d99319d82205..64eeed82d43d 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -3178,8 +3178,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, struct ieee80211_mgmt *mgmt, size_t len, gfp_t gfp) { - size_t min_hdr_len = offsetof(struct ieee80211_mgmt, - u.probe_resp.variable); + size_t min_hdr_len; struct ieee80211_ext *ext = NULL; enum cfg80211_bss_frame_type ftype; u16 beacon_interval; @@ -3202,10 +3201,16 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { ext = (void *) mgmt; - min_hdr_len = offsetof(struct ieee80211_ext, u.s1g_beacon); if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) min_hdr_len = offsetof(struct ieee80211_ext, u.s1g_short_beacon.variable); + else + min_hdr_len = offsetof(struct ieee80211_ext, + u.s1g_beacon.variable); + } else { + /* same for beacons */ + min_hdr_len = offsetof(struct ieee80211_mgmt, + u.probe_resp.variable); } if (WARN_ON(len < min_hdr_len)) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index e419aa8c4a5a..d9d7bf8bb5c1 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1045,6 +1045,7 @@ void cfg80211_connect_done(struct net_device *dev, cfg80211_hold_bss( bss_from_pub(params->links[link].bss)); ev->cr.links[link].bss = params->links[link].bss; + ev->cr.links[link].status = params->links[link].status; if (params->links[link].addr) { ev->cr.links[link].addr = next; diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl index 591d85e8ca7e..4586a18dfe9b 100644 --- a/scripts/syscall.tbl +++ b/scripts/syscall.tbl @@ -98,9 +98,9 @@ 77 common tee sys_tee 78 common readlinkat sys_readlinkat 79 stat64 fstatat64 sys_fstatat64 -79 newstat fstatat sys_newfstatat +79 64 newfstatat sys_newfstatat 80 stat64 fstat64 sys_fstat64 -80 newstat fstat sys_newfstat +80 64 newfstat sys_newfstat 81 common sync sys_sync 82 common fsync sys_fsync 83 common fdatasync sys_fdatasync @@ -402,4 +402,3 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal -467 common uretprobe sys_uretprobe diff --git a/sound/core/seq/seq_ports.h b/sound/core/seq/seq_ports.h index b111382f697a..9e36738c0dd0 100644 --- a/sound/core/seq/seq_ports.h +++ b/sound/core/seq/seq_ports.h @@ -7,6 +7,7 @@ #define __SND_SEQ_PORTS_H #include <sound/seq_kernel.h> +#include <sound/ump_convert.h> #include "seq_lock.h" /* list of 'exported' ports */ @@ -42,17 +43,6 @@ struct snd_seq_port_subs_info { int (*close)(void *private_data, struct snd_seq_port_subscribe *info); }; -/* context for converting from legacy control event to UMP packet */ -struct snd_seq_ump_midi2_bank { - bool rpn_set; - bool nrpn_set; - bool bank_set; - unsigned char cc_rpn_msb, cc_rpn_lsb; - unsigned char cc_nrpn_msb, cc_nrpn_lsb; - unsigned char cc_data_msb, cc_data_lsb; - unsigned char cc_bank_msb, cc_bank_lsb; -}; - struct snd_seq_client_port { struct snd_seq_addr addr; /* client/port number */ @@ -88,7 +78,7 @@ struct snd_seq_client_port { unsigned char ump_group; #if IS_ENABLED(CONFIG_SND_SEQ_UMP) - struct snd_seq_ump_midi2_bank midi2_bank[16]; /* per channel */ + struct ump_cvt_to_ump_bank midi2_bank[16]; /* per channel */ #endif }; diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c index e90b27a135e6..4dd540cbb1cb 100644 --- a/sound/core/seq/seq_ump_convert.c +++ b/sound/core/seq/seq_ump_convert.c @@ -368,7 +368,7 @@ static int cvt_ump_midi1_to_midi2(struct snd_seq_client *dest, struct snd_seq_ump_event ev_cvt; const union snd_ump_midi1_msg *midi1 = (const union snd_ump_midi1_msg *)event->ump; union snd_ump_midi2_msg *midi2 = (union snd_ump_midi2_msg *)ev_cvt.ump; - struct snd_seq_ump_midi2_bank *cc; + struct ump_cvt_to_ump_bank *cc; ev_cvt = *event; memset(&ev_cvt.ump, 0, sizeof(ev_cvt.ump)); @@ -789,28 +789,45 @@ static int paf_ev_to_ump_midi2(const struct snd_seq_event *event, return 1; } +static void reset_rpn(struct ump_cvt_to_ump_bank *cc) +{ + cc->rpn_set = 0; + cc->nrpn_set = 0; + cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; + cc->cc_data_msb = cc->cc_data_lsb = 0; + cc->cc_data_msb_set = cc->cc_data_lsb_set = 0; +} + /* set up the MIDI2 RPN/NRPN packet data from the parsed info */ -static void fill_rpn(struct snd_seq_ump_midi2_bank *cc, - union snd_ump_midi2_msg *data, - unsigned char channel) +static int fill_rpn(struct ump_cvt_to_ump_bank *cc, + union snd_ump_midi2_msg *data, + unsigned char channel, + bool flush) { + if (!(cc->cc_data_lsb_set || cc->cc_data_msb_set)) + return 0; // skip + /* when not flushing, wait for complete data set */ + if (!flush && (!cc->cc_data_lsb_set || !cc->cc_data_msb_set)) + return 0; // skip + if (cc->rpn_set) { data->rpn.status = UMP_MSG_STATUS_RPN; data->rpn.bank = cc->cc_rpn_msb; data->rpn.index = cc->cc_rpn_lsb; - cc->rpn_set = 0; - cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; - } else { + } else if (cc->nrpn_set) { data->rpn.status = UMP_MSG_STATUS_NRPN; data->rpn.bank = cc->cc_nrpn_msb; data->rpn.index = cc->cc_nrpn_lsb; - cc->nrpn_set = 0; - cc->cc_nrpn_msb = cc->cc_nrpn_lsb = 0; + } else { + return 0; // skip } + data->rpn.data = upscale_14_to_32bit((cc->cc_data_msb << 7) | cc->cc_data_lsb); data->rpn.channel = channel; - cc->cc_data_msb = cc->cc_data_lsb = 0; + + reset_rpn(cc); + return 1; } /* convert CC event to MIDI 2.0 UMP */ @@ -822,29 +839,39 @@ static int cc_ev_to_ump_midi2(const struct snd_seq_event *event, unsigned char channel = event->data.control.channel & 0x0f; unsigned char index = event->data.control.param & 0x7f; unsigned char val = event->data.control.value & 0x7f; - struct snd_seq_ump_midi2_bank *cc = &dest_port->midi2_bank[channel]; + struct ump_cvt_to_ump_bank *cc = &dest_port->midi2_bank[channel]; + int ret; /* process special CC's (bank/rpn/nrpn) */ switch (index) { case UMP_CC_RPN_MSB: + ret = fill_rpn(cc, data, channel, true); cc->rpn_set = 1; cc->cc_rpn_msb = val; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_RPN_LSB: + ret = fill_rpn(cc, data, channel, true); cc->rpn_set = 1; cc->cc_rpn_lsb = val; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_NRPN_MSB: + ret = fill_rpn(cc, data, channel, true); cc->nrpn_set = 1; cc->cc_nrpn_msb = val; - return 0; // skip + return ret; case UMP_CC_NRPN_LSB: + ret = fill_rpn(cc, data, channel, true); cc->nrpn_set = 1; cc->cc_nrpn_lsb = val; - return 0; // skip + return ret; case UMP_CC_DATA: + cc->cc_data_msb_set = 1; cc->cc_data_msb = val; - return 0; // skip + return fill_rpn(cc, data, channel, false); case UMP_CC_BANK_SELECT: cc->bank_set = 1; cc->cc_bank_msb = val; @@ -854,11 +881,9 @@ static int cc_ev_to_ump_midi2(const struct snd_seq_event *event, cc->cc_bank_lsb = val; return 0; // skip case UMP_CC_DATA_LSB: + cc->cc_data_lsb_set = 1; cc->cc_data_lsb = val; - if (!(cc->rpn_set || cc->nrpn_set)) - return 0; // skip - fill_rpn(cc, data, channel); - return 1; + return fill_rpn(cc, data, channel, false); } data->cc.status = status; @@ -887,7 +912,7 @@ static int pgm_ev_to_ump_midi2(const struct snd_seq_event *event, unsigned char status) { unsigned char channel = event->data.control.channel & 0x0f; - struct snd_seq_ump_midi2_bank *cc = &dest_port->midi2_bank[channel]; + struct ump_cvt_to_ump_bank *cc = &dest_port->midi2_bank[channel]; data->pg.status = status; data->pg.channel = channel; @@ -924,8 +949,9 @@ static int ctrl14_ev_to_ump_midi2(const struct snd_seq_event *event, { unsigned char channel = event->data.control.channel & 0x0f; unsigned char index = event->data.control.param & 0x7f; - struct snd_seq_ump_midi2_bank *cc = &dest_port->midi2_bank[channel]; + struct ump_cvt_to_ump_bank *cc = &dest_port->midi2_bank[channel]; unsigned char msb, lsb; + int ret; msb = (event->data.control.value >> 7) & 0x7f; lsb = event->data.control.value & 0x7f; @@ -939,28 +965,27 @@ static int ctrl14_ev_to_ump_midi2(const struct snd_seq_event *event, cc->cc_bank_lsb = lsb; return 0; // skip case UMP_CC_RPN_MSB: - cc->cc_rpn_msb = msb; - fallthrough; case UMP_CC_RPN_LSB: - cc->rpn_set = 1; + ret = fill_rpn(cc, data, channel, true); + cc->cc_rpn_msb = msb; cc->cc_rpn_lsb = lsb; - return 0; // skip + cc->rpn_set = 1; + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_NRPN_MSB: - cc->cc_nrpn_msb = msb; - fallthrough; case UMP_CC_NRPN_LSB: + ret = fill_rpn(cc, data, channel, true); + cc->cc_nrpn_msb = msb; cc->nrpn_set = 1; cc->cc_nrpn_lsb = lsb; - return 0; // skip + return ret; case UMP_CC_DATA: - cc->cc_data_msb = msb; - fallthrough; case UMP_CC_DATA_LSB: + cc->cc_data_msb_set = cc->cc_data_lsb_set = 1; + cc->cc_data_msb = msb; cc->cc_data_lsb = lsb; - if (!(cc->rpn_set || cc->nrpn_set)) - return 0; // skip - fill_rpn(cc, data, channel); - return 1; + return fill_rpn(cc, data, channel, false); } data->cc.status = UMP_MSG_STATUS_CC; @@ -1192,44 +1217,53 @@ static int cvt_sysex_to_ump(struct snd_seq_client *dest, { struct snd_seq_ump_event ev_cvt; unsigned char status; - u8 buf[6], *xbuf; + u8 buf[8], *xbuf; int offset = 0; int len, err; + bool finished = false; if (!snd_seq_ev_is_variable(event)) return 0; setup_ump_event(&ev_cvt, event); - for (;;) { + while (!finished) { len = snd_seq_expand_var_event_at(event, sizeof(buf), buf, offset); if (len <= 0) break; - if (WARN_ON(len > 6)) + if (WARN_ON(len > sizeof(buf))) break; - offset += len; + xbuf = buf; + status = UMP_SYSEX_STATUS_CONTINUE; + /* truncate the sysex start-marker */ if (*xbuf == UMP_MIDI1_MSG_SYSEX_START) { status = UMP_SYSEX_STATUS_START; - xbuf++; len--; - if (len > 0 && xbuf[len - 1] == UMP_MIDI1_MSG_SYSEX_END) { + offset++; + xbuf++; + } + + /* if the last of this packet or the 1st byte of the next packet + * is the end-marker, finish the transfer with this packet + */ + if (len > 0 && len < 8 && + xbuf[len - 1] == UMP_MIDI1_MSG_SYSEX_END) { + if (status == UMP_SYSEX_STATUS_START) status = UMP_SYSEX_STATUS_SINGLE; - len--; - } - } else { - if (xbuf[len - 1] == UMP_MIDI1_MSG_SYSEX_END) { + else status = UMP_SYSEX_STATUS_END; - len--; - } else { - status = UMP_SYSEX_STATUS_CONTINUE; - } + len--; + finished = true; } + + len = min(len, 6); fill_sysex7_ump(dest_port, ev_cvt.ump, status, xbuf, len); err = __snd_seq_deliver_single_event(dest, dest_port, (struct snd_seq_event *)&ev_cvt, atomic, hop); if (err < 0) return err; + offset += len; } return 0; } diff --git a/sound/core/ump_convert.c b/sound/core/ump_convert.c index f67c44c83fde..0fe13d031656 100644 --- a/sound/core/ump_convert.c +++ b/sound/core/ump_convert.c @@ -287,25 +287,42 @@ static int cvt_legacy_system_to_ump(struct ump_cvt_to_ump *cvt, return 4; } -static void fill_rpn(struct ump_cvt_to_ump_bank *cc, - union snd_ump_midi2_msg *midi2) +static void reset_rpn(struct ump_cvt_to_ump_bank *cc) { + cc->rpn_set = 0; + cc->nrpn_set = 0; + cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; + cc->cc_data_msb = cc->cc_data_lsb = 0; + cc->cc_data_msb_set = cc->cc_data_lsb_set = 0; +} + +static int fill_rpn(struct ump_cvt_to_ump_bank *cc, + union snd_ump_midi2_msg *midi2, + bool flush) +{ + if (!(cc->cc_data_lsb_set || cc->cc_data_msb_set)) + return 0; // skip + /* when not flushing, wait for complete data set */ + if (!flush && (!cc->cc_data_lsb_set || !cc->cc_data_msb_set)) + return 0; // skip + if (cc->rpn_set) { midi2->rpn.status = UMP_MSG_STATUS_RPN; midi2->rpn.bank = cc->cc_rpn_msb; midi2->rpn.index = cc->cc_rpn_lsb; - cc->rpn_set = 0; - cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; - } else { + } else if (cc->nrpn_set) { midi2->rpn.status = UMP_MSG_STATUS_NRPN; midi2->rpn.bank = cc->cc_nrpn_msb; midi2->rpn.index = cc->cc_nrpn_lsb; - cc->nrpn_set = 0; - cc->cc_nrpn_msb = cc->cc_nrpn_lsb = 0; + } else { + return 0; // skip } + midi2->rpn.data = upscale_14_to_32bit((cc->cc_data_msb << 7) | cc->cc_data_lsb); - cc->cc_data_msb = cc->cc_data_lsb = 0; + + reset_rpn(cc); + return 1; } /* convert to a MIDI 1.0 Channel Voice message */ @@ -318,6 +335,7 @@ static int cvt_legacy_cmd_to_ump(struct ump_cvt_to_ump *cvt, struct ump_cvt_to_ump_bank *cc; union snd_ump_midi2_msg *midi2 = (union snd_ump_midi2_msg *)data; unsigned char status, channel; + int ret; BUILD_BUG_ON(sizeof(union snd_ump_midi1_msg) != 4); BUILD_BUG_ON(sizeof(union snd_ump_midi2_msg) != 8); @@ -358,24 +376,33 @@ static int cvt_legacy_cmd_to_ump(struct ump_cvt_to_ump *cvt, case UMP_MSG_STATUS_CC: switch (buf[1]) { case UMP_CC_RPN_MSB: + ret = fill_rpn(cc, midi2, true); cc->rpn_set = 1; cc->cc_rpn_msb = buf[2]; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_RPN_LSB: + ret = fill_rpn(cc, midi2, true); cc->rpn_set = 1; cc->cc_rpn_lsb = buf[2]; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_NRPN_MSB: + ret = fill_rpn(cc, midi2, true); cc->nrpn_set = 1; cc->cc_nrpn_msb = buf[2]; - return 0; // skip + return ret; case UMP_CC_NRPN_LSB: + ret = fill_rpn(cc, midi2, true); cc->nrpn_set = 1; cc->cc_nrpn_lsb = buf[2]; - return 0; // skip + return ret; case UMP_CC_DATA: + cc->cc_data_msb_set = 1; cc->cc_data_msb = buf[2]; - return 0; // skip + return fill_rpn(cc, midi2, false); case UMP_CC_BANK_SELECT: cc->bank_set = 1; cc->cc_bank_msb = buf[2]; @@ -385,12 +412,9 @@ static int cvt_legacy_cmd_to_ump(struct ump_cvt_to_ump *cvt, cc->cc_bank_lsb = buf[2]; return 0; // skip case UMP_CC_DATA_LSB: + cc->cc_data_lsb_set = 1; cc->cc_data_lsb = buf[2]; - if (cc->rpn_set || cc->nrpn_set) - fill_rpn(cc, midi2); - else - return 0; // skip - break; + return fill_rpn(cc, midi2, false); default: midi2->cc.index = buf[1]; midi2->cc.data = upscale_7_to_32bit(buf[2]); diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index 1a163bbcabd7..c827d7d8d800 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -77,6 +77,8 @@ // overrun. Actual device can skip more, then this module stops the packet streaming. #define IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES 5 +static void pcm_period_work(struct work_struct *work); + /** * amdtp_stream_init - initialize an AMDTP stream structure * @s: the AMDTP stream to initialize @@ -105,6 +107,7 @@ int amdtp_stream_init(struct amdtp_stream *s, struct fw_unit *unit, s->flags = flags; s->context = ERR_PTR(-1); mutex_init(&s->mutex); + INIT_WORK(&s->period_work, pcm_period_work); s->packet_index = 0; init_waitqueue_head(&s->ready_wait); @@ -347,6 +350,7 @@ EXPORT_SYMBOL(amdtp_stream_get_max_payload); */ void amdtp_stream_pcm_prepare(struct amdtp_stream *s) { + cancel_work_sync(&s->period_work); s->pcm_buffer_pointer = 0; s->pcm_period_pointer = 0; } @@ -611,19 +615,21 @@ static void update_pcm_pointers(struct amdtp_stream *s, // The program in user process should periodically check the status of intermediate // buffer associated to PCM substream to process PCM frames in the buffer, instead // of receiving notification of period elapsed by poll wait. - if (!pcm->runtime->no_period_wakeup) { - if (in_softirq()) { - // In software IRQ context for 1394 OHCI. - snd_pcm_period_elapsed(pcm); - } else { - // In process context of ALSA PCM application under acquired lock of - // PCM substream. - snd_pcm_period_elapsed_under_stream_lock(pcm); - } - } + if (!pcm->runtime->no_period_wakeup) + queue_work(system_highpri_wq, &s->period_work); } } +static void pcm_period_work(struct work_struct *work) +{ + struct amdtp_stream *s = container_of(work, struct amdtp_stream, + period_work); + struct snd_pcm_substream *pcm = READ_ONCE(s->pcm); + + if (pcm) + snd_pcm_period_elapsed(pcm); +} + static int queue_packet(struct amdtp_stream *s, struct fw_iso_packet *params, bool sched_irq) { @@ -1849,11 +1855,14 @@ unsigned long amdtp_domain_stream_pcm_pointer(struct amdtp_domain *d, { struct amdtp_stream *irq_target = d->irq_target; - // Process isochronous packets queued till recent isochronous cycle to handle PCM frames. if (irq_target && amdtp_stream_running(irq_target)) { - // In software IRQ context, the call causes dead-lock to disable the tasklet - // synchronously. - if (!in_softirq()) + // use wq to prevent AB/BA deadlock competition for + // substream lock: + // fw_iso_context_flush_completions() acquires + // lock by ohci_flush_iso_completions(), + // amdtp-stream process_rx_packets() attempts to + // acquire same lock by snd_pcm_elapsed() + if (current_work() != &s->period_work) fw_iso_context_flush_completions(irq_target->context); } @@ -1909,6 +1918,7 @@ static void amdtp_stream_stop(struct amdtp_stream *s) return; } + cancel_work_sync(&s->period_work); fw_iso_context_stop(s->context); fw_iso_context_destroy(s->context); s->context = ERR_PTR(-1); diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h index a1ed2e80f91a..775db3fc4959 100644 --- a/sound/firewire/amdtp-stream.h +++ b/sound/firewire/amdtp-stream.h @@ -191,6 +191,7 @@ struct amdtp_stream { /* For a PCM substream processing. */ struct snd_pcm_substream *pcm; + struct work_struct period_work; snd_pcm_uframes_t pcm_buffer_pointer; unsigned int pcm_period_pointer; unsigned int pcm_frame_multiplier; diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index c2d0109866e6..68c883f202ca 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -28,7 +28,7 @@ #else #define AZX_DCAPS_I915_COMPONENT 0 /* NOP */ #endif -/* 14 unused */ +#define AZX_DCAPS_AMD_ALLOC_FIX (1 << 14) /* AMD allocation workaround */ #define AZX_DCAPS_CTX_WORKAROUND (1 << 15) /* X-Fi workaround */ #define AZX_DCAPS_POSFIX_LPIB (1 << 16) /* Use LPIB as default */ #define AZX_DCAPS_AMD_WORKAROUND (1 << 17) /* AMD-specific workaround */ diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index f64d9dc197a3..9cff87dfbecb 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -4955,6 +4955,69 @@ void snd_hda_gen_stream_pm(struct hda_codec *codec, hda_nid_t nid, bool on) } EXPORT_SYMBOL_GPL(snd_hda_gen_stream_pm); +/* forcibly mute the speaker output without caching; return true if updated */ +static bool force_mute_output_path(struct hda_codec *codec, hda_nid_t nid) +{ + if (!nid) + return false; + if (!nid_has_mute(codec, nid, HDA_OUTPUT)) + return false; /* no mute, skip */ + if (snd_hda_codec_amp_read(codec, nid, 0, HDA_OUTPUT, 0) & + snd_hda_codec_amp_read(codec, nid, 1, HDA_OUTPUT, 0) & + HDA_AMP_MUTE) + return false; /* both channels already muted, skip */ + + /* direct amp update without caching */ + snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE, + AC_AMP_SET_OUTPUT | AC_AMP_SET_LEFT | + AC_AMP_SET_RIGHT | HDA_AMP_MUTE); + return true; +} + +/** + * snd_hda_gen_shutup_speakers - Forcibly mute the speaker outputs + * @codec: the HDA codec + * + * Forcibly mute the speaker outputs, to be called at suspend or shutdown. + * + * The mute state done by this function isn't cached, hence the original state + * will be restored at resume. + * + * Return true if the mute state has been changed. + */ +bool snd_hda_gen_shutup_speakers(struct hda_codec *codec) +{ + struct hda_gen_spec *spec = codec->spec; + const int *paths; + const struct nid_path *path; + int i, p, num_paths; + bool updated = false; + + /* if already powered off, do nothing */ + if (!snd_hdac_is_power_on(&codec->core)) + return false; + + if (spec->autocfg.line_out_type == AUTO_PIN_SPEAKER_OUT) { + paths = spec->out_paths; + num_paths = spec->autocfg.line_outs; + } else { + paths = spec->speaker_paths; + num_paths = spec->autocfg.speaker_outs; + } + + for (i = 0; i < num_paths; i++) { + path = snd_hda_get_path_from_idx(codec, paths[i]); + if (!path) + continue; + for (p = 0; p < path->depth; p++) + if (force_mute_output_path(codec, path->path[p])) + updated = true; + } + + return updated; +} +EXPORT_SYMBOL_GPL(snd_hda_gen_shutup_speakers); + /** * snd_hda_gen_parse_auto_config - Parse the given BIOS configuration and * set up the hda_gen_spec diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h index 8f5ecf740c49..08544601b4ce 100644 --- a/sound/pci/hda/hda_generic.h +++ b/sound/pci/hda/hda_generic.h @@ -353,5 +353,6 @@ int snd_hda_gen_add_mute_led_cdev(struct hda_codec *codec, int snd_hda_gen_add_micmute_led_cdev(struct hda_codec *codec, int (*callback)(struct led_classdev *, enum led_brightness)); +bool snd_hda_gen_shutup_speakers(struct hda_codec *codec); #endif /* __SOUND_HDA_GENERIC_H */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b33602e64d17..97d33a48ff17 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -40,6 +40,7 @@ #ifdef CONFIG_X86 /* for snoop control */ +#include <linux/dma-map-ops.h> #include <asm/set_memory.h> #include <asm/cpufeature.h> #endif @@ -306,7 +307,7 @@ enum { /* quirks for ATI HDMI with snoop off */ #define AZX_DCAPS_PRESET_ATI_HDMI_NS \ - (AZX_DCAPS_PRESET_ATI_HDMI | AZX_DCAPS_SNOOP_OFF) + (AZX_DCAPS_PRESET_ATI_HDMI | AZX_DCAPS_AMD_ALLOC_FIX) /* quirks for AMD SB */ #define AZX_DCAPS_PRESET_AMD_SB \ @@ -1702,6 +1703,13 @@ static void azx_check_snoop_available(struct azx *chip) if (chip->driver_caps & AZX_DCAPS_SNOOP_OFF) snoop = false; +#ifdef CONFIG_X86 + /* check the presence of DMA ops (i.e. IOMMU), disable snoop conditionally */ + if ((chip->driver_caps & AZX_DCAPS_AMD_ALLOC_FIX) && + !get_dma_ops(chip->card->dev)) + snoop = false; +#endif + chip->snoop = snoop; if (!snoop) { dev_info(chip->card->dev, "Force to non-snoop mode\n"); diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 17389a3801bd..f030669243f9 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -21,12 +21,6 @@ #include "hda_jack.h" #include "hda_generic.h" -enum { - CX_HEADSET_NOPRESENT = 0, - CX_HEADSET_PARTPRESENT, - CX_HEADSET_ALLPRESENT, -}; - struct conexant_spec { struct hda_gen_spec gen; @@ -48,7 +42,6 @@ struct conexant_spec { unsigned int gpio_led; unsigned int gpio_mute_led_mask; unsigned int gpio_mic_led_mask; - unsigned int headset_present_flag; bool is_cx8070_sn6140; }; @@ -212,6 +205,8 @@ static void cx_auto_shutdown(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; + snd_hda_gen_shutup_speakers(codec); + /* Turn the problematic codec into D3 to avoid spurious noises from the internal speaker during (and after) reboot */ cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false); @@ -250,48 +245,19 @@ static void cx_process_headset_plugin(struct hda_codec *codec) } } -static void cx_update_headset_mic_vref(struct hda_codec *codec, unsigned int res) +static void cx_update_headset_mic_vref(struct hda_codec *codec, struct hda_jack_callback *event) { - unsigned int phone_present, mic_persent, phone_tag, mic_tag; - struct conexant_spec *spec = codec->spec; + unsigned int mic_present; /* In cx8070 and sn6140, the node 16 can only be config to headphone or disabled, * the node 19 can only be config to microphone or disabled. * Check hp&mic tag to process headset pulgin&plugout. */ - phone_tag = snd_hda_codec_read(codec, 0x16, 0, AC_VERB_GET_UNSOLICITED_RESPONSE, 0x0); - mic_tag = snd_hda_codec_read(codec, 0x19, 0, AC_VERB_GET_UNSOLICITED_RESPONSE, 0x0); - if ((phone_tag & (res >> AC_UNSOL_RES_TAG_SHIFT)) || - (mic_tag & (res >> AC_UNSOL_RES_TAG_SHIFT))) { - phone_present = snd_hda_codec_read(codec, 0x16, 0, AC_VERB_GET_PIN_SENSE, 0x0); - if (!(phone_present & AC_PINSENSE_PRESENCE)) {/* headphone plugout */ - spec->headset_present_flag = CX_HEADSET_NOPRESENT; - snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20); - return; - } - if (spec->headset_present_flag == CX_HEADSET_NOPRESENT) { - spec->headset_present_flag = CX_HEADSET_PARTPRESENT; - } else if (spec->headset_present_flag == CX_HEADSET_PARTPRESENT) { - mic_persent = snd_hda_codec_read(codec, 0x19, 0, - AC_VERB_GET_PIN_SENSE, 0x0); - /* headset is present */ - if ((phone_present & AC_PINSENSE_PRESENCE) && - (mic_persent & AC_PINSENSE_PRESENCE)) { - cx_process_headset_plugin(codec); - spec->headset_present_flag = CX_HEADSET_ALLPRESENT; - } - } - } -} - -static void cx_jack_unsol_event(struct hda_codec *codec, unsigned int res) -{ - struct conexant_spec *spec = codec->spec; - - if (spec->is_cx8070_sn6140) - cx_update_headset_mic_vref(codec, res); - - snd_hda_jack_unsol_event(codec, res); + mic_present = snd_hda_codec_read(codec, 0x19, 0, AC_VERB_GET_PIN_SENSE, 0x0); + if (!(mic_present & AC_PINSENSE_PRESENCE)) /* mic plugout */ + snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20); + else + cx_process_headset_plugin(codec); } static int cx_auto_suspend(struct hda_codec *codec) @@ -305,7 +271,7 @@ static const struct hda_codec_ops cx_auto_patch_ops = { .build_pcms = snd_hda_gen_build_pcms, .init = cx_auto_init, .free = cx_auto_free, - .unsol_event = cx_jack_unsol_event, + .unsol_event = snd_hda_jack_unsol_event, .suspend = cx_auto_suspend, .check_power_status = snd_hda_gen_check_power_status, }; @@ -1163,7 +1129,7 @@ static int patch_conexant_auto(struct hda_codec *codec) case 0x14f11f86: case 0x14f11f87: spec->is_cx8070_sn6140 = true; - spec->headset_present_flag = CX_HEADSET_NOPRESENT; + snd_hda_jack_detect_enable_callback(codec, 0x19, cx_update_headset_mic_vref); break; } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ba0ce8750ca4..1645d21d422f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9872,6 +9872,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x0840, "Acer Aspire E1", ALC269VB_FIXUP_ASPIRE_E1_COEF), + SND_PCI_QUIRK(0x1025, 0x100c, "Acer Aspire E5-574G", ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1025, 0x101c, "Acer Veriton N2510G", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1065, "Acer Aspire C20-820", ALC269VC_FIXUP_ACER_HEADSET_MIC), diff --git a/sound/usb/stream.c b/sound/usb/stream.c index d5409f387945..e14c725acebf 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -244,8 +244,8 @@ static struct snd_pcm_chmap_elem *convert_chmap(int channels, unsigned int bits, SNDRV_CHMAP_FR, /* right front */ SNDRV_CHMAP_FC, /* center front */ SNDRV_CHMAP_LFE, /* LFE */ - SNDRV_CHMAP_SL, /* left surround */ - SNDRV_CHMAP_SR, /* right surround */ + SNDRV_CHMAP_RL, /* left surround */ + SNDRV_CHMAP_RR, /* right surround */ SNDRV_CHMAP_FLC, /* left of center */ SNDRV_CHMAP_FRC, /* right of center */ SNDRV_CHMAP_RC, /* surround */ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 489cbed7e82a..12796808f07a 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -82,7 +82,30 @@ FILES= \ FILES := $(addprefix $(OUTPUT),$(FILES)) -PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config +# Some distros provide the command $(CROSS_COMPILE)pkg-config for +# searching packges installed with Multiarch. Use it for cross +# compilation if it is existed. +ifneq (, $(shell which $(CROSS_COMPILE)pkg-config)) + PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config +else + PKG_CONFIG ?= pkg-config + + # PKG_CONFIG_PATH or PKG_CONFIG_LIBDIR, alongside PKG_CONFIG_SYSROOT_DIR + # for modified system root, are required for the cross compilation. + # If these PKG_CONFIG environment variables are not set, Multiarch library + # paths are used instead. + ifdef CROSS_COMPILE + ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),) + CROSS_ARCH = $(shell $(CC) -dumpmachine) + PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/share/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/share/pkgconfig/ + export PKG_CONFIG_LIBDIR + endif + endif +endif all: $(FILES) @@ -147,7 +170,17 @@ $(OUTPUT)test-libopencsd.bin: DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) -DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 + DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd + + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) + LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) + + # Elfutils merged libebl.a into libdw.a starting from version 0.177, + # Link libebl.a only if libdw is older than this version. + ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) + DWARFLIBS += -lebl + endif endif $(OUTPUT)test-dwarf.bin: @@ -178,27 +211,27 @@ $(OUTPUT)test-numa_num_possible_cpus.bin: $(BUILD) -lnuma $(OUTPUT)test-libunwind.bin: - $(BUILD) -lelf + $(BUILD) -lelf -llzma $(OUTPUT)test-libunwind-debug-frame.bin: - $(BUILD) -lelf + $(BUILD) -lelf -llzma $(OUTPUT)test-libunwind-x86.bin: - $(BUILD) -lelf -lunwind-x86 + $(BUILD) -lelf -llzma -lunwind-x86 $(OUTPUT)test-libunwind-x86_64.bin: - $(BUILD) -lelf -lunwind-x86_64 + $(BUILD) -lelf -llzma -lunwind-x86_64 $(OUTPUT)test-libunwind-arm.bin: - $(BUILD) -lelf -lunwind-arm + $(BUILD) -lelf -llzma -lunwind-arm $(OUTPUT)test-libunwind-aarch64.bin: - $(BUILD) -lelf -lunwind-aarch64 + $(BUILD) -lelf -llzma -lunwind-aarch64 $(OUTPUT)test-libunwind-debug-frame-arm.bin: - $(BUILD) -lelf -lunwind-arm + $(BUILD) -lelf -llzma -lunwind-arm $(OUTPUT)test-libunwind-debug-frame-aarch64.bin: - $(BUILD) -lelf -lunwind-aarch64 + $(BUILD) -lelf -llzma -lunwind-aarch64 $(OUTPUT)test-libaudit.bin: $(BUILD) -laudit diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt index 3766886c4bca..83dc87c662b6 100644 --- a/tools/perf/Documentation/Build.txt +++ b/tools/perf/Documentation/Build.txt @@ -71,3 +71,31 @@ supported by GCC. UBSan detects undefined behaviors of programs at runtime. $ UBSAN_OPTIONS=print_stacktrace=1 ./perf record -a If UBSan detects any problem at runtime, it outputs a “runtime error:” message. + +4) Cross compilation +==================== +As Multiarch is commonly supported in Linux distributions, we can install +libraries for multiple architectures on the same system and then cross-compile +Linux perf. For example, Aarch64 libraries and toolchains can be installed on +an x86_64 machine, allowing us to compile perf for an Aarch64 target. + +Below is the command for building the perf with dynamic linking. + + $ cd /path/to/Linux + $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- -C tools/perf + +For static linking, the option `LDFLAGS="-static"` is required. + + $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- \ + LDFLAGS="-static" -C tools/perf + +In the embedded system world, a use case is to explicitly specify the package +configuration paths for cross building: + + $ PKG_CONFIG_SYSROOT_DIR="/path/to/cross/build/sysroot" \ + PKG_CONFIG_LIBDIR="/usr/lib/:/usr/local/lib" \ + make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- -C tools/perf + +In this case, the variable PKG_CONFIG_SYSROOT_DIR can be used alongside the +variable PKG_CONFIG_LIBDIR or PKG_CONFIG_PATH to prepend the sysroot path to +the library paths for cross compilation. diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c896babf7a74..fa679db61f62 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -152,7 +152,17 @@ ifdef LIBDW_DIR endif DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) - DWARFLIBS += -lelf -lebl -ldl -lz -llzma -lbz2 + DWARFLIBS += -lelf -ldl -lz -llzma -lbz2 -lzstd + + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) + LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) + + # Elfutils merged libebl.a into libdw.a starting from version 0.177, + # Link libebl.a only if libdw is older than this version. + ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) + DWARFLIBS += -lebl + endif endif FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) $(DWARFLIBS) @@ -296,6 +306,11 @@ endif ifdef PYTHON_CONFIG PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null) + # Update the python flags for cross compilation + ifdef CROSS_COMPILE + PYTHON_NATIVE := $(shell echo $(PYTHON_EMBED_LDOPTS) | sed 's/\(-L.*\/\)\(.*-linux-gnu\).*/\2/') + PYTHON_EMBED_LDOPTS := $(subst $(PYTHON_NATIVE),$(shell $(CC) -dumpmachine),$(PYTHON_EMBED_LDOPTS)) + endif PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) @@ -897,6 +912,9 @@ else PYTHON_SETUPTOOLS_INSTALLED := $(shell $(PYTHON) -c 'import setuptools;' 2> /dev/null && echo "yes" || echo "no") ifeq ($(PYTHON_SETUPTOOLS_INSTALLED), yes) PYTHON_EXTENSION_SUFFIX := $(shell $(PYTHON) -c 'from importlib import machinery; print(machinery.EXTENSION_SUFFIXES[0])') + ifdef CROSS_COMPILE + PYTHON_EXTENSION_SUFFIX := $(subst $(PYTHON_NATIVE),$(shell $(CC) -dumpmachine),$(PYTHON_EXTENSION_SUFFIX)) + endif LANG_BINDINGS += $(obj-perf)python/perf$(PYTHON_EXTENSION_SUFFIX) else $(warning Missing python setuptools, the python binding won't be built, please install python3-setuptools or equivalent) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 175e4c7898f0..f8148db5fc38 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -193,7 +193,32 @@ HOSTLD ?= ld HOSTAR ?= ar CLANG ?= clang -PKG_CONFIG = $(CROSS_COMPILE)pkg-config +# Some distros provide the command $(CROSS_COMPILE)pkg-config for +# searching packges installed with Multiarch. Use it for cross +# compilation if it is existed. +ifneq (, $(shell which $(CROSS_COMPILE)pkg-config)) + PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config +else + PKG_CONFIG ?= pkg-config + + # PKG_CONFIG_PATH or PKG_CONFIG_LIBDIR, alongside PKG_CONFIG_SYSROOT_DIR + # for modified system root, is required for the cross compilation. + # If these PKG_CONFIG environment variables are not set, Multiarch library + # paths are used instead. + ifdef CROSS_COMPILE + ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),) + CROSS_ARCH = $(shell $(CC) -dumpmachine) + PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/share/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/share/pkgconfig/ + export PKG_CONFIG_LIBDIR + $(warning Missing PKG_CONFIG_LIBDIR, PKG_CONFIG_PATH and PKG_CONFIG_SYSROOT_DIR for cross compilation,) + $(warning set PKG_CONFIG_LIBDIR for using Multiarch libs.) + endif + endif +endif RM = rm -f LN = ln -f diff --git a/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json b/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json index 9b4a032186a7..7149caec4f80 100644 --- a/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json index a9939823b14b..0c9b9a2d2958 100644 --- a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json +++ b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json @@ -74,7 +74,7 @@ { "PublicDescription": "Sent SFENCE.VMA with ASID request to other HART event", "ConfigCode": "0x800000000000000c", - "EventName": "FW_SFENCE_VMA_RECEIVED", + "EventName": "FW_SFENCE_VMA_ASID_SENT", "BriefDescription": "Sent SFENCE.VMA with ASID request to other HART event" }, { diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json index 9b4a032186a7..7149caec4f80 100644 --- a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json index 9b4a032186a7..7149caec4f80 100644 --- a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json index 9b4a032186a7..7149caec4f80 100644 --- a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 1730b852a947..6d075648d2cc 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1141,7 +1141,7 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, bool hide_unresolved) { - struct machine *machine = maps__machine(node->ms.maps); + struct machine *machine = node->ms.maps ? maps__machine(node->ms.maps) : NULL; maps__put(al->maps); al->maps = maps__get(node->ms.maps); diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index dd49c1d23a60..81d4757ecd4c 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -713,7 +713,7 @@ $(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) - $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ + $(Q)$(CXX) $(subst -D_GNU_SOURCE=,,$(CFLAGS)) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c index bd8c75b620c2..c397336fe1ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c @@ -216,7 +216,7 @@ static void test_uretprobe_regs_change(void) } #ifndef __NR_uretprobe -#define __NR_uretprobe 467 +#define __NR_uretprobe 335 #endif __naked unsigned long uretprobe_syscall_call_1(void) @@ -253,7 +253,7 @@ static void test_uretprobe_syscall_call(void) struct uprobe_syscall_executed *skel; int pid, status, err, go[2], c; - if (ASSERT_OK(pipe(go), "pipe")) + if (!ASSERT_OK(pipe(go), "pipe")) return; skel = uprobe_syscall_executed__open_and_load(); diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index 5f541522364f..5d0a809dc2df 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -29,9 +29,11 @@ static int check_vgem(int fd) version.name = name; ret = ioctl(fd, DRM_IOCTL_VERSION, &version); - if (ret) + if (ret || version.name_len != 4) return 0; + name[4] = '\0'; + return !strcmp(name, "vgem"); } diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 931dbc36ca43..011508ca604b 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -19,6 +19,15 @@ def _rss_key_rand(length): return [random.randint(0, 255) for _ in range(length)] +def _rss_key_check(cfg, data=None, context=0): + if data is None: + data = get_rss(cfg, context=context) + if 'rss-hash-key' not in data: + return + non_zero = [x for x in data['rss-hash-key'] if x != 0] + ksft_eq(bool(non_zero), True, comment=f"RSS key is all zero {data['rss-hash-key']}") + + def get_rss(cfg, context=0): return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0] @@ -90,8 +99,9 @@ def _send_traffic_check(cfg, port, name, params): def test_rss_key_indir(cfg): """Test basics like updating the main RSS key and indirection table.""" - if len(_get_rx_cnts(cfg)) < 2: - KsftSkipEx("Device has only one queue (or doesn't support queue stats)") + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 3: + KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") data = get_rss(cfg) want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table'] @@ -101,6 +111,7 @@ def test_rss_key_indir(cfg): if not data[k]: raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}") + _rss_key_check(cfg, data=data) key_len = len(data['rss-hash-key']) # Set the key @@ -110,9 +121,26 @@ def test_rss_key_indir(cfg): data = get_rss(cfg) ksft_eq(key, data['rss-hash-key']) + # Set the indirection table and the key together + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} equal 3 hkey " + _rss_key_str(key)) + reset_indir = defer(ethtool, f"-X {cfg.ifname} default") + + data = get_rss(cfg) + _rss_key_check(cfg, data=data) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(2, max(data['rss-indirection-table'])) + + # Reset indirection table and set the key + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} default hkey " + _rss_key_str(key)) + data = get_rss(cfg) + _rss_key_check(cfg, data=data) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(qcnt - 1, max(data['rss-indirection-table'])) + # Set the indirection table ethtool(f"-X {cfg.ifname} equal 2") - reset_indir = defer(ethtool, f"-X {cfg.ifname} default") data = get_rss(cfg) ksft_eq(0, min(data['rss-indirection-table'])) ksft_eq(1, max(data['rss-indirection-table'])) @@ -317,8 +345,11 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): ctx_cnt = i break + _rss_key_check(cfg, context=ctx_id) + if not create_with_cfg: ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}") + _rss_key_check(cfg, context=ctx_id) # Sanity check the context we just created data = get_rss(cfg, ctx_id) diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c index dc0408a831d0..75b7b4ef6cfa 100644 --- a/tools/testing/selftests/hid/hid_bpf.c +++ b/tools/testing/selftests/hid/hid_bpf.c @@ -532,6 +532,7 @@ static void load_programs(const struct test_program programs[], FIXTURE_DATA(hid_bpf) * self, const FIXTURE_VARIANT(hid_bpf) * variant) { + struct bpf_map *iter_map; int err = -EINVAL; ASSERT_LE(progs_count, ARRAY_SIZE(self->hid_links)) @@ -564,6 +565,13 @@ static void load_programs(const struct test_program programs[], *ops_hid_id = self->hid_id; } + /* we disable the auto-attach feature of all maps because we + * only want the tested one to be manually attached in the next + * call to bpf_map__attach_struct_ops() + */ + bpf_object__for_each_map(iter_map, *self->skel->skeleton->obj) + bpf_map__set_autoattach(iter_map, false); + err = hid__load(self->skel); ASSERT_OK(err) TH_LOG("hid_skel_load failed: %d", err); @@ -687,6 +695,24 @@ TEST_F(hid_bpf, subprog_raw_event) } /* + * Attach hid_first_event to the given uhid device, + * attempt at re-attaching it, we should not lock and + * return an invalid struct bpf_link + */ +TEST_F(hid_bpf, multiple_attach) +{ + const struct test_program progs[] = { + { .name = "hid_first_event" }, + }; + struct bpf_link *link; + + LOAD_PROGRAMS(progs); + + link = bpf_map__attach_struct_ops(self->skel->maps.first_event); + ASSERT_NULL(link) TH_LOG("unexpected return value when re-attaching the struct_ops"); +} + +/* * Ensures that we can attach/detach programs */ TEST_F(hid_bpf, test_attach_detach) diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c index ee9bbbcf751b..5ecc845ef792 100644 --- a/tools/testing/selftests/hid/progs/hid.c +++ b/tools/testing/selftests/hid/progs/hid.c @@ -455,7 +455,7 @@ struct { __type(value, struct elem); } hmap SEC(".maps"); -static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work) +static int wq_cb_sleepable(void *map, int *key, void *work) { __u8 buf[9] = {2, 3, 4, 5, 6, 7, 8, 9, 10}; struct hid_bpf_ctx *hid_ctx; diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h index cfe37f491906..e5db897586bb 100644 --- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h +++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h @@ -114,7 +114,7 @@ extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx, extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym; extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym; extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, - int (callback_fn)(void *map, int *key, struct bpf_wq *wq), + int (callback_fn)(void *map, int *key, void *wq), unsigned int flags__k, void *aux__ign) __ksym; #define bpf_wq_set_callback(timer, cb, flags) \ bpf_wq_set_callback_impl(timer, cb, flags, NULL) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index f92c2fb23fcd..8e34f7fa44e9 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -961,10 +961,10 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB); KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC); KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX); KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS); -KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA), -KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB), -KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD), -KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF), +KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA); +KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB); +KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD); +KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF); KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP); KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA); KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index d2043ec3bf6d..4209b9569039 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1115,11 +1115,11 @@ again: return 1; } - if (--cfg_repeat > 0) { - if (cfg_input) - close(fd); + if (cfg_input) + close(fd); + + if (--cfg_repeat > 0) goto again; - } return 0; } diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 108aeeb84ef1..4df48f1f14ab 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -661,7 +661,7 @@ pm_nl_check_endpoint() done if [ -z "${id}" ]; then - test_fail "bad test - missing endpoint id" + fail_test "bad test - missing endpoint id" return fi @@ -1634,6 +1634,8 @@ chk_prio_nr() { local mp_prio_nr_tx=$1 local mp_prio_nr_rx=$2 + local mpj_syn=$3 + local mpj_syn_ack=$4 local count print_check "ptx" @@ -1655,6 +1657,26 @@ chk_prio_nr() else print_ok fi + + print_check "syn backup" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynBackupRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mpj_syn" ]; then + fail_test "got $count JOIN[s] syn with Backup expected $mpj_syn" + else + print_ok + fi + + print_check "synack backup" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckBackupRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mpj_syn_ack" ]; then + fail_test "got $count JOIN[s] synack with Backup expected $mpj_syn_ack" + else + print_ok + fi } chk_subflow_nr() @@ -2612,33 +2634,46 @@ backup_tests() sflags=nobackup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 fi # single address, backup if reset "single address, backup" && continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup + pm_nl_set_limits $ns2 1 1 + sflags=nobackup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_prio_nr 1 0 0 1 + fi + + # single address, switch to backup + if reset "single address, switch to backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_set_limits $ns2 1 1 sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 - chk_prio_nr 1 1 + chk_prio_nr 1 1 0 0 fi # single address with port, backup if reset "single address with port, backup" && continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup port 10100 pm_nl_set_limits $ns2 1 1 - sflags=backup speed=slow \ + sflags=nobackup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 - chk_prio_nr 1 1 + chk_prio_nr 1 0 0 1 fi if reset "mpc backup" && @@ -2647,17 +2682,26 @@ backup_tests() speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 0 1 + chk_prio_nr 0 1 0 0 fi if reset "mpc backup both sides" && continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then - pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + + # 10.0.2.2 (non-backup) -> 10.0.1.1 (backup) + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + # 10.0.1.2 (backup) -> 10.0.2.1 (non-backup) + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + ip -net "$ns2" route add 10.0.2.1 via 10.0.1.1 dev ns2eth1 # force this path + speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - chk_prio_nr 1 1 + chk_join_nr 2 2 2 + chk_prio_nr 1 1 1 1 fi if reset "mpc switch to backup" && @@ -2666,7 +2710,7 @@ backup_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 0 1 + chk_prio_nr 0 1 0 0 fi if reset "mpc switch to backup both sides" && @@ -2676,7 +2720,7 @@ backup_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 1 1 + chk_prio_nr 1 1 0 0 fi } @@ -3053,7 +3097,7 @@ fullmesh_tests() addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 chk_rm_nr 0 1 fi @@ -3066,7 +3110,7 @@ fullmesh_tests() sflags=nobackup,nofullmesh speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 chk_rm_nr 0 1 fi } @@ -3318,7 +3362,7 @@ userspace_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 0 - chk_prio_nr 0 0 + chk_prio_nr 0 0 0 0 fi # userspace pm type prevents rm_addr @@ -3526,6 +3570,35 @@ endpoint_tests() chk_mptcp_info subflows 1 subflows 1 mptcp_lib_kill_wait $tests_pid fi + + # remove and re-add + if reset "delete re-add signal" && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + test_linkfail=4 speed=20 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + + wait_mpj $ns2 + pm_nl_check_endpoint "creation" \ + $ns1 10.0.2.1 id 1 flags signal + chk_subflow_nr "before delete" 2 + chk_mptcp_info subflows 1 subflows 1 + + pm_nl_del_endpoint $ns1 1 10.0.2.1 + sleep 0.5 + chk_subflow_nr "after delete" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-add" 2 + chk_mptcp_info subflows 1 subflows 1 + mptcp_lib_kill_wait $tests_pid + fi + } # [$1: error message] diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index b14e14cdbfb9..fd6a3010afa8 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -113,10 +113,10 @@ config KVM_GENERIC_PRIVATE_MEM select KVM_PRIVATE_MEM bool -config HAVE_KVM_GMEM_PREPARE +config HAVE_KVM_ARCH_GMEM_PREPARE bool depends on KVM_PRIVATE_MEM -config HAVE_KVM_GMEM_INVALIDATE +config HAVE_KVM_ARCH_GMEM_INVALIDATE bool depends on KVM_PRIVATE_MEM diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 1c509c351261..8f079a61a56d 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -13,84 +13,93 @@ struct kvm_gmem { struct list_head entry; }; -static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct folio *folio) +/** + * folio_file_pfn - like folio_file_page, but return a pfn. + * @folio: The folio which contains this index. + * @index: The index we want to look up. + * + * Return: The pfn for this index. + */ +static inline kvm_pfn_t folio_file_pfn(struct folio *folio, pgoff_t index) { -#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE - struct list_head *gmem_list = &inode->i_mapping->i_private_list; - struct kvm_gmem *gmem; + return folio_pfn(folio) + (index & (folio_nr_pages(folio) - 1)); +} - list_for_each_entry(gmem, gmem_list, entry) { - struct kvm_memory_slot *slot; - struct kvm *kvm = gmem->kvm; - struct page *page; - kvm_pfn_t pfn; - gfn_t gfn; - int rc; - - if (!kvm_arch_gmem_prepare_needed(kvm)) - continue; - - slot = xa_load(&gmem->bindings, index); - if (!slot) - continue; - - page = folio_file_page(folio, index); - pfn = page_to_pfn(page); - gfn = slot->base_gfn + index - slot->gmem.pgoff; - rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page))); - if (rc) { - pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n", - index, gfn, pfn, rc); - return rc; - } +static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, + pgoff_t index, struct folio *folio) +{ +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE + kvm_pfn_t pfn = folio_file_pfn(folio, index); + gfn_t gfn = slot->base_gfn + index - slot->gmem.pgoff; + int rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, folio_order(folio)); + if (rc) { + pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n", + index, gfn, pfn, rc); + return rc; } - #endif + return 0; } -static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare) +static inline void kvm_gmem_mark_prepared(struct folio *folio) { - struct folio *folio; + folio_mark_uptodate(folio); +} - /* TODO: Support huge pages. */ - folio = filemap_grab_folio(inode->i_mapping, index); - if (IS_ERR(folio)) - return folio; +/* + * Process @folio, which contains @gfn, so that the guest can use it. + * The folio must be locked and the gfn must be contained in @slot. + * On successful return the guest sees a zero page so as to avoid + * leaking host data and the up-to-date flag is set. + */ +static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, + gfn_t gfn, struct folio *folio) +{ + unsigned long nr_pages, i; + pgoff_t index; + int r; + + nr_pages = folio_nr_pages(folio); + for (i = 0; i < nr_pages; i++) + clear_highpage(folio_page(folio, i)); /* - * Use the up-to-date flag to track whether or not the memory has been - * zeroed before being handed off to the guest. There is no backing - * storage for the memory, so the folio will remain up-to-date until - * it's removed. + * Preparing huge folios should always be safe, since it should + * be possible to split them later if needed. * - * TODO: Skip clearing pages when trusted firmware will do it when - * assigning memory to the guest. + * Right now the folio order is always going to be zero, but the + * code is ready for huge folios. The only assumption is that + * the base pgoff of memslots is naturally aligned with the + * requested page order, ensuring that huge folios can also use + * huge page table entries for GPA->HPA mapping. + * + * The order will be passed when creating the guest_memfd, and + * checked when creating memslots. */ - if (!folio_test_uptodate(folio)) { - unsigned long nr_pages = folio_nr_pages(folio); - unsigned long i; - - for (i = 0; i < nr_pages; i++) - clear_highpage(folio_page(folio, i)); - - folio_mark_uptodate(folio); - } + WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, 1 << folio_order(folio))); + index = gfn - slot->base_gfn + slot->gmem.pgoff; + index = ALIGN_DOWN(index, 1 << folio_order(folio)); + r = __kvm_gmem_prepare_folio(kvm, slot, index, folio); + if (!r) + kvm_gmem_mark_prepared(folio); - if (prepare) { - int r = kvm_gmem_prepare_folio(inode, index, folio); - if (r < 0) { - folio_unlock(folio); - folio_put(folio); - return ERR_PTR(r); - } - } + return r; +} - /* - * Ignore accessed, referenced, and dirty flags. The memory is - * unevictable and there is no storage to write back to. - */ - return folio; +/* + * Returns a locked folio on success. The caller is responsible for + * setting the up-to-date flag before the memory is mapped into the guest. + * There is no backing storage for the memory, so the folio will remain + * up-to-date until it's removed. + * + * Ignore accessed, referenced, and dirty flags. The memory is + * unevictable and there is no storage to write back to. + */ +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) +{ + /* TODO: Support huge pages. */ + return filemap_grab_folio(inode->i_mapping, index); } static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, @@ -190,7 +199,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len) break; } - folio = kvm_gmem_get_folio(inode, index, true); + folio = kvm_gmem_get_folio(inode, index); if (IS_ERR(folio)) { r = PTR_ERR(folio); break; @@ -343,7 +352,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol return MF_DELAYED; } -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE static void kvm_gmem_free_folio(struct folio *folio) { struct page *page = folio_page(folio, 0); @@ -358,7 +367,7 @@ static const struct address_space_operations kvm_gmem_aops = { .dirty_folio = noop_dirty_folio, .migrate_folio = kvm_gmem_migrate_folio, .error_remove_folio = kvm_gmem_error_folio, -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE .free_folio = kvm_gmem_free_folio, #endif }; @@ -541,64 +550,76 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot) fput(file); } -static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, - gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare) +/* Returns a locked folio on success. */ +static struct folio * +__kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, + gfn_t gfn, kvm_pfn_t *pfn, bool *is_prepared, + int *max_order) { pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff; struct kvm_gmem *gmem = file->private_data; struct folio *folio; - struct page *page; - int r; if (file != slot->gmem.file) { WARN_ON_ONCE(slot->gmem.file); - return -EFAULT; + return ERR_PTR(-EFAULT); } gmem = file->private_data; if (xa_load(&gmem->bindings, index) != slot) { WARN_ON_ONCE(xa_load(&gmem->bindings, index)); - return -EIO; + return ERR_PTR(-EIO); } - folio = kvm_gmem_get_folio(file_inode(file), index, prepare); + folio = kvm_gmem_get_folio(file_inode(file), index); if (IS_ERR(folio)) - return PTR_ERR(folio); + return folio; if (folio_test_hwpoison(folio)) { folio_unlock(folio); folio_put(folio); - return -EHWPOISON; + return ERR_PTR(-EHWPOISON); } - page = folio_file_page(folio, index); - - *pfn = page_to_pfn(page); + *pfn = folio_file_pfn(folio, index); if (max_order) *max_order = 0; - r = 0; - - folio_unlock(folio); - - return r; + *is_prepared = folio_test_uptodate(folio); + return folio; } int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, kvm_pfn_t *pfn, int *max_order) { struct file *file = kvm_gmem_get_file(slot); - int r; + struct folio *folio; + bool is_prepared = false; + int r = 0; if (!file) return -EFAULT; - r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true); + folio = __kvm_gmem_get_pfn(file, slot, gfn, pfn, &is_prepared, max_order); + if (IS_ERR(folio)) { + r = PTR_ERR(folio); + goto out; + } + + if (!is_prepared) + r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio); + + folio_unlock(folio); + if (r < 0) + folio_put(folio); + +out: fput(file); return r; } EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); +#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque) { @@ -625,7 +646,9 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages); for (i = 0; i < npages; i += (1 << max_order)) { + struct folio *folio; gfn_t gfn = start_gfn + i; + bool is_prepared = false; kvm_pfn_t pfn; if (signal_pending(current)) { @@ -633,18 +656,39 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long break; } - ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, false); - if (ret) + folio = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &is_prepared, &max_order); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); break; + } - if (!IS_ALIGNED(gfn, (1 << max_order)) || - (npages - i) < (1 << max_order)) - max_order = 0; + if (is_prepared) { + folio_unlock(folio); + folio_put(folio); + ret = -EEXIST; + break; + } + + folio_unlock(folio); + WARN_ON(!IS_ALIGNED(gfn, 1 << max_order) || + (npages - i) < (1 << max_order)); + + ret = -EINVAL; + while (!kvm_range_has_memory_attributes(kvm, gfn, gfn + (1 << max_order), + KVM_MEMORY_ATTRIBUTE_PRIVATE, + KVM_MEMORY_ATTRIBUTE_PRIVATE)) { + if (!max_order) + goto put_folio_and_exit; + max_order--; + } p = src ? src + i * PAGE_SIZE : NULL; ret = post_populate(kvm, gfn, pfn, p, max_order, opaque); + if (!ret) + kvm_gmem_mark_prepared(folio); - put_page(pfn_to_page(pfn)); +put_folio_and_exit: + folio_put(folio); if (ret) break; } @@ -655,3 +699,4 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long return ret && !i ? ret : i; } EXPORT_SYMBOL_GPL(kvm_gmem_populate); +#endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d0788d0a72cc..92901656a0d4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2398,48 +2398,47 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, #endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +static u64 kvm_supported_mem_attributes(struct kvm *kvm) +{ + if (!kvm || kvm_arch_has_private_mem(kvm)) + return KVM_MEMORY_ATTRIBUTE_PRIVATE; + + return 0; +} + /* * Returns true if _all_ gfns in the range [@start, @end) have attributes - * matching @attrs. + * such that the bits in @mask match @attrs. */ bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, - unsigned long attrs) + unsigned long mask, unsigned long attrs) { XA_STATE(xas, &kvm->mem_attr_array, start); unsigned long index; - bool has_attrs; void *entry; - rcu_read_lock(); + mask &= kvm_supported_mem_attributes(kvm); + if (attrs & ~mask) + return false; - if (!attrs) { - has_attrs = !xas_find(&xas, end - 1); - goto out; - } + if (end == start + 1) + return (kvm_get_memory_attributes(kvm, start) & mask) == attrs; + + guard(rcu)(); + if (!attrs) + return !xas_find(&xas, end - 1); - has_attrs = true; for (index = start; index < end; index++) { do { entry = xas_next(&xas); } while (xas_retry(&xas, entry)); - if (xas.xa_index != index || xa_to_value(entry) != attrs) { - has_attrs = false; - break; - } + if (xas.xa_index != index || + (xa_to_value(entry) & mask) != attrs) + return false; } -out: - rcu_read_unlock(); - return has_attrs; -} - -static u64 kvm_supported_mem_attributes(struct kvm *kvm) -{ - if (!kvm || kvm_arch_has_private_mem(kvm)) - return KVM_MEMORY_ATTRIBUTE_PRIVATE; - - return 0; + return true; } static __always_inline void kvm_handle_gfn_range(struct kvm *kvm, @@ -2534,7 +2533,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, mutex_lock(&kvm->slots_lock); /* Nothing to do if the entire range as the desired attributes. */ - if (kvm_range_has_memory_attributes(kvm, start, end, attributes)) + if (kvm_range_has_memory_attributes(kvm, start, end, ~0, attributes)) goto out_unlock; /* |