diff options
350 files changed, 3949 insertions, 1320 deletions
@@ -107,6 +107,7 @@ Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch> Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com> Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com> Mark Brown <broonie@sirena.org.uk> +Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com> Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com> Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com> Matthieu CASTET <castet.matthieu@free.fr> diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index b2598cc9834c..7242cbda15dd 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -109,6 +109,7 @@ parameter is applicable:: IPV6 IPv6 support is enabled. ISAPNP ISA PnP code is enabled. ISDN Appropriate ISDN support is enabled. + ISOL CPU Isolation is enabled. JOY Appropriate joystick support is enabled. KGDB Kernel debugger support is enabled. KVM Kernel Virtual Machine support is enabled. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6571fbfdb2a1..af7104aaffd9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -328,11 +328,15 @@ not play well with APC CPU idle - disable it if you have APC and your system crashes randomly. - apic= [APIC,X86-32] Advanced Programmable Interrupt Controller + apic= [APIC,X86] Advanced Programmable Interrupt Controller Change the output verbosity whilst booting Format: { quiet (default) | verbose | debug } Change the amount of debugging information output when initialising the APIC and IO-APIC components. + For X86-32, this can also be used to specify an APIC + driver name. + Format: apic=driver_name + Examples: apic=bigsmp apic_extnmi= [APIC,X86] External NMI delivery setting Format: { bsp (default) | all | none } @@ -1737,7 +1741,7 @@ isapnp= [ISAPNP] Format: <RDP>,<reset>,<pci_scan>,<verbosity> - isolcpus= [KNL,SMP] Isolate a given set of CPUs from disturbance. + isolcpus= [KNL,SMP,ISOL] Isolate a given set of CPUs from disturbance. [Deprecated - use cpusets instead] Format: [flag-list,]<cpu-list> @@ -2662,7 +2666,7 @@ Valid arguments: on, off Default: on - nohz_full= [KNL,BOOT] + nohz_full= [KNL,BOOT,SMP,ISOL] The argument is a cpu list, as described above. In kernels built with CONFIG_NO_HZ_FULL=y, set the specified list of CPUs whose tick will be stopped @@ -2708,6 +2712,8 @@ steal time is computed, but won't influence scheduler behaviour + nopti [X86-64] Disable kernel page table isolation + nolapic [X86-32,APIC] Do not enable or use the local APIC. nolapic_timer [X86-32,APIC] Do not use the local APIC timer. @@ -3282,6 +3288,12 @@ pt. [PARIDE] See Documentation/blockdev/paride.txt. + pti= [X86_64] + Control user/kernel address space isolation: + on - enable + off - disable + auto - default setting + pty.legacy_count= [KNL] Number of legacy pty's. Overwrites compiled-in default number. diff --git a/Documentation/admin-guide/thunderbolt.rst b/Documentation/admin-guide/thunderbolt.rst index de50a8561774..9b55952039a6 100644 --- a/Documentation/admin-guide/thunderbolt.rst +++ b/Documentation/admin-guide/thunderbolt.rst @@ -230,7 +230,7 @@ If supported by your machine this will be exposed by the WMI bus with a sysfs attribute called "force_power". For example the intel-wmi-thunderbolt driver exposes this attribute in: - /sys/devices/platform/PNP0C14:00/wmi_bus/wmi_bus-PNP0C14:00/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power + /sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power To force the power to on, write 1 to this attribute file. To disable force power, write 0 to this attribute file. diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 2e7ee0313c1c..e94d3ac2bdd0 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -341,10 +341,7 @@ GuC GuC-specific firmware loader ---------------------------- -.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c - :doc: GuC-specific firmware loader - -.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c +.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c :internal: GuC-based command submission diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 51101708a03a..ea91cb61a602 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -12,7 +12,9 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) ... unused hole ... -fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping + vaddr_end for KASLR +fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping +fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space @@ -29,20 +31,22 @@ Virtual memory map with 5 level page tables: hole caused by [56:63] sign extension ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory -ff90000000000000 - ff91ffffffffffff (=49 bits) hole -ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space +ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI +ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB) ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) ... unused hole ... ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) ... unused hole ... -fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping + vaddr_end for KASLR +fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping +... unused hole ... ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space ... unused hole ... ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 -ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space +ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space [fixmap start] - ffffffffff5fffff kernel-internal fixmap range ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole @@ -66,9 +70,10 @@ memory window (this size is arbitrary, it can be raised later if needed). The mappings are not part of any other kernel PGD and are only available during EFI runtime calls. -The module mapping space size changes based on the CONFIG requirements for the -following fixmap section. - Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all physical memory, vmalloc/ioremap space and virtual memory map are randomized. Their order is preserved but their base will be offset early at boot time. + +Be very careful vs. KASLR when changing anything here. The KASLR address +range must not overlap with anything except the KASAN shadow area, which is +correct as KASAN disables KASLR. diff --git a/MAINTAINERS b/MAINTAINERS index 753799d24cd9..e81d91f7cd4e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2622,24 +2622,22 @@ F: fs/bfs/ F: include/uapi/linux/bfs_fs.h BLACKFIN ARCHITECTURE -M: Steven Miao <realmz6@gmail.com> L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) T: git git://git.code.sf.net/p/adi-linux/code W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: arch/blackfin/ BLACKFIN EMAC DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/net/ethernet/adi/ BLACKFIN MEDIA DRIVER -M: Scott Jiang <scott.jiang.linux@gmail.com> L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org/ -S: Supported +S: Orphan F: drivers/media/platform/blackfin/ F: drivers/media/i2c/adv7183* F: drivers/media/i2c/vs6624* @@ -2647,25 +2645,25 @@ F: drivers/media/i2c/vs6624* BLACKFIN RTC DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/rtc/rtc-bfin.c BLACKFIN SDH DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/mmc/host/bfin_sdh.c BLACKFIN SERIAL DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/tty/serial/bfin_uart.c BLACKFIN WATCHDOG DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/watchdog/bfin_wdt.c BLINKM RGB LED DRIVER @@ -5154,15 +5152,15 @@ F: sound/usb/misc/ua101.c EFI TEST DRIVER L: linux-efi@vger.kernel.org M: Ivan Hu <ivan.hu@canonical.com> -M: Matt Fleming <matt@codeblueprint.co.uk> +M: Ard Biesheuvel <ard.biesheuvel@linaro.org> S: Maintained F: drivers/firmware/efi/test/ EFI VARIABLE FILESYSTEM M: Matthew Garrett <matthew.garrett@nebula.com> M: Jeremy Kerr <jk@ozlabs.org> -M: Matt Fleming <matt@codeblueprint.co.uk> -T: git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git +M: Ard Biesheuvel <ard.biesheuvel@linaro.org> +T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git L: linux-efi@vger.kernel.org S: Maintained F: fs/efivarfs/ @@ -5323,7 +5321,6 @@ S: Supported F: security/integrity/evm/ EXTENSIBLE FIRMWARE INTERFACE (EFI) -M: Matt Fleming <matt@codeblueprint.co.uk> M: Ard Biesheuvel <ard.biesheuvel@linaro.org> L: linux-efi@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git @@ -10152,7 +10149,7 @@ F: drivers/irqchip/irq-ompic.c F: drivers/irqchip/irq-or1k-* OPENVSWITCH -M: Pravin Shelar <pshelar@nicira.com> +M: Pravin B Shelar <pshelar@ovn.org> L: netdev@vger.kernel.org L: dev@openvswitch.org W: http://openvswitch.org @@ -13508,6 +13505,7 @@ M: Mika Westerberg <mika.westerberg@linux.intel.com> M: Yehezkel Bernat <yehezkel.bernat@intel.com> T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git S: Maintained +F: Documentation/admin-guide/thunderbolt.rst F: drivers/thunderbolt/ F: include/linux/thunderbolt.h @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc7 NAME = Fearless Coyote # *DOCUMENTATION* @@ -789,6 +789,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) +# Make sure -fstack-check isn't enabled (like gentoo apparently did) +KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,) + # conserve stack if available KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 4e6e9f57e790..dc91c663bcc0 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -35,6 +35,14 @@ reg = <0x80 0x10>, <0x100 0x10>; #clock-cells = <0>; clocks = <&input_clk>; + + /* + * Set initial core pll output frequency to 90MHz. + * It will be applied at the core pll driver probing + * on early boot. + */ + assigned-clocks = <&core_clk>; + assigned-clock-rates = <90000000>; }; core_intc: archs-intc@cpu { diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 63954a8b0100..69ff4895f2ba 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -35,6 +35,14 @@ reg = <0x80 0x10>, <0x100 0x10>; #clock-cells = <0>; clocks = <&input_clk>; + + /* + * Set initial core pll output frequency to 100MHz. + * It will be applied at the core pll driver probing + * on early boot. + */ + assigned-clocks = <&core_clk>; + assigned-clock-rates = <100000000>; }; core_intc: archs-intc@cpu { diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 8f627c200d60..006aa3de5348 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -114,6 +114,14 @@ reg = <0x00 0x10>, <0x14B8 0x4>; #clock-cells = <0>; clocks = <&input_clk>; + + /* + * Set initial core pll output frequency to 1GHz. + * It will be applied at the core pll driver probing + * on early boot. + */ + assigned-clocks = <&core_clk>; + assigned-clock-rates = <1000000000>; }; serial: serial@5000 { diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 7b8f8faf8a24..ac6b0ed8341e 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -49,10 +49,11 @@ CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set +CONFIG_DRM=y +# CONFIG_DRM_FBDEV_EMULATION is not set +CONFIG_DRM_UDL=y CONFIG_FB=y -CONFIG_FB_UDL=y CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_USB=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_OHCI_HCD=y diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index f35974ee7264..c9173c02081c 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -668,6 +668,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count) return 0; __asm__ __volatile__( + " mov lp_count, %5 \n" " lp 3f \n" "1: ldb.ab %3, [%2, 1] \n" " breq.d %3, 0, 3f \n" @@ -684,8 +685,8 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count) " .word 1b, 4b \n" " .previous \n" : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) - : "g"(-EFAULT), "l"(count) - : "memory"); + : "g"(-EFAULT), "r"(count) + : "lp_count", "lp_start", "lp_end", "memory"); return res; } diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 7ef7d9a8ff89..9d27331fe69a 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -199,7 +199,7 @@ static void read_arc_build_cfg_regs(void) unsigned int exec_ctrl; READ_BCR(AUX_EXEC_CTRL, exec_ctrl); - cpu->extn.dual_enb = exec_ctrl & 1; + cpu->extn.dual_enb = !(exec_ctrl & 1); /* dual issue always present for this core */ cpu->extn.dual = 1; diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index 74315f302971..bf40e06f3fb8 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -163,7 +163,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, */ static int __print_sym(unsigned int address, void *unused) { - __print_symbol(" %s\n", address); + printk(" %pS\n", (void *)address); return 0; } diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index bcd7c9fc5d0f..133a4dae41fe 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -83,6 +83,7 @@ DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC) DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR) DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT) DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN) +DO_ERROR_INFO(SIGSEGV, "gcc generated __builtin_trap", do_trap5_error, 0) /* * Entry Point for Misaligned Data access Exception, for emulating in software @@ -115,6 +116,8 @@ void do_machine_check_fault(unsigned long address, struct pt_regs *regs) * Thus TRAP_S <n> can be used for specific purpose * -1 used for software breakpointing (gdb) * -2 used by kprobes + * -5 __builtin_trap() generated by gcc (2018.03 onwards) for toggle such as + * -fno-isolate-erroneous-paths-dereference */ void do_non_swi_trap(unsigned long address, struct pt_regs *regs) { @@ -134,6 +137,9 @@ void do_non_swi_trap(unsigned long address, struct pt_regs *regs) kgdb_trap(regs); break; + case 5: + do_trap5_error(address, regs); + break; default: break; } @@ -155,3 +161,11 @@ void do_insterror_or_kprobe(unsigned long address, struct pt_regs *regs) insterror_is_error(address, regs); } + +/* + * abort() call generated by older gcc for __builtin_trap() + */ +void abort(void) +{ + __asm__ __volatile__("trap_s 5\n"); +} diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 7d8c1d6c2f60..6e9a0a9a6a04 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -163,6 +163,9 @@ static void show_ecr_verbose(struct pt_regs *regs) else pr_cont("Bus Error, check PRM\n"); #endif + } else if (vec == ECR_V_TRAP) { + if (regs->ecr_param == 5) + pr_cont("gcc generated __builtin_trap\n"); } else { pr_cont("Check Programmer's Manual\n"); } diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index f1ac6790da5f..46544e88492d 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -317,25 +317,23 @@ static void __init axs103_early_init(void) * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack * of fudging the freq in DT */ +#define AXS103_QUAD_CORE_CPU_FREQ_HZ 50000000 + unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F; if (num_cores > 2) { - u32 freq = 50, orig; - /* - * TODO: use cpu node "cpu-freq" param instead of platform-specific - * "/cpu_card/core_clk" as it works only if we use fixed-clock for cpu. - */ + u32 freq; int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk"); const struct fdt_property *prop; prop = fdt_get_property(initial_boot_params, off, - "clock-frequency", NULL); - orig = be32_to_cpu(*(u32*)(prop->data)) / 1000000; + "assigned-clock-rates", NULL); + freq = be32_to_cpu(*(u32 *)(prop->data)); /* Patching .dtb in-place with new core clock value */ - if (freq != orig ) { - freq = cpu_to_be32(freq * 1000000); + if (freq != AXS103_QUAD_CORE_CPU_FREQ_HZ) { + freq = cpu_to_be32(AXS103_QUAD_CORE_CPU_FREQ_HZ); fdt_setprop_inplace(initial_boot_params, off, - "clock-frequency", &freq, sizeof(freq)); + "assigned-clock-rates", &freq, sizeof(freq)); } } #endif diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index fd0ae5e38639..2958aedb649a 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -38,42 +38,6 @@ static void __init hsdk_init_per_cpu(unsigned int cpu) #define CREG_PAE (CREG_BASE + 0x180) #define CREG_PAE_UPDATE (CREG_BASE + 0x194) -#define CREG_CORE_IF_CLK_DIV (CREG_BASE + 0x4B8) -#define CREG_CORE_IF_CLK_DIV_2 0x1 -#define CGU_BASE ARC_PERIPHERAL_BASE -#define CGU_PLL_STATUS (ARC_PERIPHERAL_BASE + 0x4) -#define CGU_PLL_CTRL (ARC_PERIPHERAL_BASE + 0x0) -#define CGU_PLL_STATUS_LOCK BIT(0) -#define CGU_PLL_STATUS_ERR BIT(1) -#define CGU_PLL_CTRL_1GHZ 0x3A10 -#define HSDK_PLL_LOCK_TIMEOUT 500 - -#define HSDK_PLL_LOCKED() \ - !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_LOCK) - -#define HSDK_PLL_ERR() \ - !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_ERR) - -static void __init hsdk_set_cpu_freq_1ghz(void) -{ - u32 timeout = HSDK_PLL_LOCK_TIMEOUT; - - /* - * As we set cpu clock which exceeds 500MHz, the divider for the interface - * clock must be programmed to div-by-2. - */ - iowrite32(CREG_CORE_IF_CLK_DIV_2, (void __iomem *) CREG_CORE_IF_CLK_DIV); - - /* Set cpu clock to 1GHz */ - iowrite32(CGU_PLL_CTRL_1GHZ, (void __iomem *) CGU_PLL_CTRL); - - while (!HSDK_PLL_LOCKED() && timeout--) - cpu_relax(); - - if (!HSDK_PLL_LOCKED() || HSDK_PLL_ERR()) - pr_err("Failed to setup CPU frequency to 1GHz!"); -} - #define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000) #define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108) #define SDIO_UHS_REG_EXT_DIV_2 (2 << 30) @@ -98,12 +62,6 @@ static void __init hsdk_init_early(void) * minimum possible div-by-2. */ iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT); - - /* - * Setup CPU frequency to 1GHz. - * TODO: remove it after smart hsdk pll driver will be introduced. - */ - hsdk_set_cpu_freq_1ghz(); } static const char *hsdk_compat[] __initconst = { diff --git a/arch/arm/boot/dts/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed-g4.dtsi index 45d815a86d42..de08d9045cb8 100644 --- a/arch/arm/boot/dts/aspeed-g4.dtsi +++ b/arch/arm/boot/dts/aspeed-g4.dtsi @@ -219,7 +219,7 @@ compatible = "aspeed,ast2400-vuart"; reg = <0x1e787000 0x40>; reg-shift = <2>; - interrupts = <10>; + interrupts = <8>; clocks = <&clk_uart>; no-loopback-test; status = "disabled"; diff --git a/arch/arm/boot/dts/at91-tse850-3.dts b/arch/arm/boot/dts/at91-tse850-3.dts index 5f29010cdbd8..9b82cc8843e1 100644 --- a/arch/arm/boot/dts/at91-tse850-3.dts +++ b/arch/arm/boot/dts/at91-tse850-3.dts @@ -221,6 +221,7 @@ jc42@18 { compatible = "nxp,se97b", "jedec,jc-42.4-temp"; reg = <0x18>; + smbus-timeout-disable; }; dpot: mcp4651-104@28 { diff --git a/arch/arm/boot/dts/da850-lego-ev3.dts b/arch/arm/boot/dts/da850-lego-ev3.dts index 413dbd5d9f64..81942ae83e1f 100644 --- a/arch/arm/boot/dts/da850-lego-ev3.dts +++ b/arch/arm/boot/dts/da850-lego-ev3.dts @@ -178,7 +178,7 @@ */ battery { pinctrl-names = "default"; - pintctrl-0 = <&battery_pins>; + pinctrl-0 = <&battery_pins>; compatible = "lego,ev3-battery"; io-channels = <&adc 4>, <&adc 3>; io-channel-names = "voltage", "current"; @@ -392,7 +392,7 @@ batt_volt_en { gpio-hog; gpios = <6 GPIO_ACTIVE_HIGH>; - output-low; + output-high; }; }; diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts index b2b95ff205e8..0029ec27819c 100644 --- a/arch/arm/boot/dts/exynos5800-peach-pi.dts +++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts @@ -664,6 +664,10 @@ status = "okay"; }; +&mixer { + status = "okay"; +}; + /* eMMC flash */ &mmc_0 { status = "okay"; diff --git a/arch/arm/boot/dts/ls1021a-qds.dts b/arch/arm/boot/dts/ls1021a-qds.dts index 4f211e3c903a..7bb402d3e9d0 100644 --- a/arch/arm/boot/dts/ls1021a-qds.dts +++ b/arch/arm/boot/dts/ls1021a-qds.dts @@ -215,7 +215,7 @@ reg = <0x2a>; VDDA-supply = <®_3p3v>; VDDIO-supply = <®_3p3v>; - clocks = <&sys_mclk 1>; + clocks = <&sys_mclk>; }; }; }; diff --git a/arch/arm/boot/dts/ls1021a-twr.dts b/arch/arm/boot/dts/ls1021a-twr.dts index 7202d9c504be..860b898141f0 100644 --- a/arch/arm/boot/dts/ls1021a-twr.dts +++ b/arch/arm/boot/dts/ls1021a-twr.dts @@ -187,7 +187,7 @@ reg = <0x0a>; VDDA-supply = <®_3p3v>; VDDIO-supply = <®_3p3v>; - clocks = <&sys_mclk 1>; + clocks = <&sys_mclk>; }; }; diff --git a/arch/arm/boot/dts/rk3066a-marsboard.dts b/arch/arm/boot/dts/rk3066a-marsboard.dts index c6d92c25df42..d23ee6d911ac 100644 --- a/arch/arm/boot/dts/rk3066a-marsboard.dts +++ b/arch/arm/boot/dts/rk3066a-marsboard.dts @@ -83,6 +83,10 @@ }; }; +&cpu0 { + cpu0-supply = <&vdd_arm>; +}; + &i2c1 { status = "okay"; clock-frequency = <400000>; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index cd24894ee5c6..6102e4e7f35c 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -956,7 +956,7 @@ iep_mmu: iommu@ff900800 { compatible = "rockchip,iommu"; reg = <0x0 0xff900800 0x0 0x40>; - interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH 0>; + interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "iep_mmu"; #iommu-cells = <0>; status = "disabled"; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index b91300d49a31..5840f5c75c3b 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -502,8 +502,8 @@ reg = <0x01c16000 0x1000>; interrupts = <58>; clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>, - <&ccu 9>, - <&ccu 18>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "pll-0", "pll-1"; dmas = <&dma SUN4I_DMA_NORMAL 16>, <&dma SUN4I_DMA_NORMAL 16>, diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index 6ae4d95e230e..316cb8b2945b 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -82,8 +82,8 @@ reg = <0x01c16000 0x1000>; interrupts = <58>; clocks = <&ccu CLK_AHB_HDMI>, <&ccu CLK_HDMI>, - <&ccu 9>, - <&ccu 16>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "pll-0", "pll-1"; dmas = <&dma SUN4I_DMA_NORMAL 16>, <&dma SUN4I_DMA_NORMAL 16>, diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index 8bfa12b548e0..72d3fe44ecaf 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -429,8 +429,8 @@ interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_AHB1_HDMI>, <&ccu CLK_HDMI>, <&ccu CLK_HDMI_DDC>, - <&ccu 7>, - <&ccu 13>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "ddc", "pll-0", "pll-1"; resets = <&ccu RST_AHB1_HDMI>; reset-names = "ahb"; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 68dfa82544fc..59655e42e4b0 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -581,8 +581,8 @@ reg = <0x01c16000 0x1000>; interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>, - <&ccu 9>, - <&ccu 18>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "pll-0", "pll-1"; dmas = <&dma SUN4I_DMA_NORMAL 16>, <&dma SUN4I_DMA_NORMAL 16>, diff --git a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts index 98715538932f..a021ee6da396 100644 --- a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts +++ b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts @@ -146,6 +146,7 @@ status = "okay"; axp81x: pmic@3a3 { + compatible = "x-powers,axp813"; reg = <0x3a3>; interrupt-parent = <&r_intc>; interrupts = <0 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm/boot/dts/tango4-common.dtsi b/arch/arm/boot/dts/tango4-common.dtsi index 0ec1b0a317b4..ff72a8efb73d 100644 --- a/arch/arm/boot/dts/tango4-common.dtsi +++ b/arch/arm/boot/dts/tango4-common.dtsi @@ -156,7 +156,6 @@ reg = <0x6e000 0x400>; ranges = <0 0x6e000 0x400>; interrupt-parent = <&gic>; - interrupt-controller; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 5cf04888c581..3e26c6f7a191 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -793,7 +793,6 @@ void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } -EXPORT_SYMBOL(abort); void __init trap_init(void) { diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 8be04ec95adf..5ace9380626a 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -868,10 +868,10 @@ static const struct dma_slave_map dm365_edma_map[] = { { "spi_davinci.0", "rx", EDMA_FILTER_PARAM(0, 17) }, { "spi_davinci.3", "tx", EDMA_FILTER_PARAM(0, 18) }, { "spi_davinci.3", "rx", EDMA_FILTER_PARAM(0, 19) }, - { "dm6441-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) }, - { "dm6441-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) }, - { "dm6441-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) }, - { "dm6441-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) }, + { "da830-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) }, + { "da830-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) }, + { "da830-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) }, + { "da830-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) }, }; static struct edma_soc_info dm365_edma_pdata = { @@ -925,12 +925,14 @@ static struct resource edma_resources[] = { /* not using TC*_ERR */ }; -static struct platform_device dm365_edma_device = { - .name = "edma", - .id = 0, - .dev.platform_data = &dm365_edma_pdata, - .num_resources = ARRAY_SIZE(edma_resources), - .resource = edma_resources, +static const struct platform_device_info dm365_edma_device __initconst = { + .name = "edma", + .id = 0, + .dma_mask = DMA_BIT_MASK(32), + .res = edma_resources, + .num_res = ARRAY_SIZE(edma_resources), + .data = &dm365_edma_pdata, + .size_data = sizeof(dm365_edma_pdata), }; static struct resource dm365_asp_resources[] = { @@ -1428,13 +1430,18 @@ int __init dm365_init_video(struct vpfe_config *vpfe_cfg, static int __init dm365_init_devices(void) { + struct platform_device *edma_pdev; int ret = 0; if (!cpu_is_davinci_dm365()) return 0; davinci_cfg_reg(DM365_INT_EDMA_CC); - platform_device_register(&dm365_edma_device); + edma_pdev = platform_device_register_full(&dm365_edma_device); + if (IS_ERR(edma_pdev)) { + pr_warn("%s: Failed to register eDMA\n", __func__); + return PTR_ERR(edma_pdev); + } platform_device_register(&dm365_mdio_device); platform_device_register(&dm365_emac_device); diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index 45bdbfb96126..4a8d3f83a36e 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -75,6 +75,7 @@ pinctrl-0 = <&rgmii_pins>; phy-mode = "rgmii"; phy-handle = <&ext_rgmii_phy>; + phy-supply = <®_dc1sw>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts index 806442d3e846..604cdaedac38 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts @@ -77,6 +77,7 @@ pinctrl-0 = <&rmii_pins>; phy-mode = "rmii"; phy-handle = <&ext_rmii_phy1>; + phy-supply = <®_dc1sw>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts index 0eb2acedf8c3..abe179de35d7 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts @@ -82,6 +82,7 @@ pinctrl-0 = <&rgmii_pins>; phy-mode = "rgmii"; phy-handle = <&ext_rgmii_phy>; + phy-supply = <®_dc1sw>; status = "okay"; }; @@ -95,7 +96,7 @@ &mmc2 { pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins>; - vmmc-supply = <®_vcc3v3>; + vmmc-supply = <®_dcdc1>; vqmmc-supply = <®_vcc1v8>; bus-width = <8>; non-removable; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi index a5da18a6f286..43418bd881d8 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi @@ -45,19 +45,10 @@ #include "sun50i-a64.dtsi" -/ { - reg_vcc3v3: vcc3v3 { - compatible = "regulator-fixed"; - regulator-name = "vcc3v3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - }; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins>; - vmmc-supply = <®_vcc3v3>; + vmmc-supply = <®_dcdc1>; non-removable; disable-wp; bus-width = <4>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts index b6b7a561df8c..a42fd79a62a3 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts @@ -71,7 +71,7 @@ pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>; vmmc-supply = <®_vcc3v3>; bus-width = <4>; - cd-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>; + cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index a298df74ca6c..dbe2648649db 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -255,7 +255,6 @@ &avb { pinctrl-0 = <&avb_pins>; pinctrl-names = "default"; - renesas,no-ether-link; phy-handle = <&phy0>; status = "okay"; diff --git a/arch/arm64/boot/dts/renesas/ulcb.dtsi b/arch/arm64/boot/dts/renesas/ulcb.dtsi index 0d85b315ce71..73439cf48659 100644 --- a/arch/arm64/boot/dts/renesas/ulcb.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb.dtsi @@ -145,7 +145,6 @@ &avb { pinctrl-0 = <&avb_pins>; pinctrl-names = "default"; - renesas,no-ether-link; phy-handle = <&phy0>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index d4f80786e7c2..3890468678ce 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -132,6 +132,8 @@ assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>; assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>; clock_in_out = "input"; + /* shows instability at 1GBit right now */ + max-speed = <100>; phy-supply = <&vcc_io>; phy-mode = "rgmii"; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 41d61840fb99..2426da631938 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -514,7 +514,7 @@ tsadc: tsadc@ff250000 { compatible = "rockchip,rk3328-tsadc"; reg = <0x0 0xff250000 0x0 0x100>; - interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH 0>; + interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>; assigned-clocks = <&cru SCLK_TSADC>; assigned-clock-rates = <50000>; clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 910628d18add..1fc5060d7027 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -155,17 +155,6 @@ regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; }; - - vdd_log: vdd-log { - compatible = "pwm-regulator"; - pwms = <&pwm2 0 25000 0>; - regulator-name = "vdd_log"; - regulator-min-microvolt = <800000>; - regulator-max-microvolt = <1400000>; - regulator-always-on; - regulator-boot-on; - status = "okay"; - }; }; &cpu_b0 { diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi index 48e733136db4..0ac2ace82435 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi @@ -198,8 +198,8 @@ gpio-controller; #gpio-cells = <2>; gpio-ranges = <&pinctrl 0 0 0>, - <&pinctrl 96 0 0>, - <&pinctrl 160 0 0>; + <&pinctrl 104 0 0>, + <&pinctrl 168 0 0>; gpio-ranges-group-names = "gpio_range0", "gpio_range1", "gpio_range2"; diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index cb79fba79d43..b88a8dd14933 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -122,7 +122,6 @@ void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } -EXPORT_SYMBOL(abort); void __init trap_init(void) { diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index dd5a08aaa4da..3eb4bfc1fb36 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -12,6 +12,7 @@ for the semaphore. */ #define __PA_LDCW_ALIGNMENT 16 +#define __PA_LDCW_ALIGN_ORDER 4 #define __ldcw_align(a) ({ \ unsigned long __ret = (unsigned long) &(a)->lock[0]; \ __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ @@ -29,6 +30,7 @@ ldcd). */ #define __PA_LDCW_ALIGNMENT 4 +#define __PA_LDCW_ALIGN_ORDER 2 #define __ldcw_align(a) (&(a)->slock) #define __LDCW "ldcw,co" diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index d8f77358e2ba..29b99b8964aa 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -870,7 +870,7 @@ static void print_parisc_device(struct parisc_device *dev) static int count; print_pa_hwpath(dev, hw_path); - printk(KERN_INFO "%d. %s at 0x%p [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", + printk(KERN_INFO "%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", ++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type, dev->id.hversion_rev, dev->id.hversion, dev->id.sversion); diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index f3cecf5117cf..e95207c0565e 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -35,6 +35,7 @@ #include <asm/pgtable.h> #include <asm/signal.h> #include <asm/unistd.h> +#include <asm/ldcw.h> #include <asm/thread_info.h> #include <linux/linkage.h> @@ -46,6 +47,14 @@ #endif .import pa_tlb_lock,data + .macro load_pa_tlb_lock reg +#if __PA_LDCW_ALIGNMENT > 4 + load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg + depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg +#else + load32 PA(pa_tlb_lock), \reg +#endif + .endm /* space_to_prot macro creates a prot id from a space id */ @@ -457,7 +466,7 @@ .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault #ifdef CONFIG_SMP cmpib,COND(=),n 0,\spc,2f - load32 PA(pa_tlb_lock),\tmp + load_pa_tlb_lock \tmp 1: LDCW 0(\tmp),\tmp1 cmpib,COND(=) 0,\tmp1,1b nop @@ -480,7 +489,7 @@ /* Release pa_tlb_lock lock. */ .macro tlb_unlock1 spc,tmp #ifdef CONFIG_SMP - load32 PA(pa_tlb_lock),\tmp + load_pa_tlb_lock \tmp tlb_unlock0 \spc,\tmp #endif .endm diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index adf7187f8951..2d40c4ff3f69 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -36,6 +36,7 @@ #include <asm/assembly.h> #include <asm/pgtable.h> #include <asm/cache.h> +#include <asm/ldcw.h> #include <linux/linkage.h> .text @@ -333,8 +334,12 @@ ENDPROC_CFI(flush_data_cache_local) .macro tlb_lock la,flags,tmp #ifdef CONFIG_SMP - ldil L%pa_tlb_lock,%r1 - ldo R%pa_tlb_lock(%r1),\la +#if __PA_LDCW_ALIGNMENT > 4 + load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la + depi 0,31,__PA_LDCW_ALIGN_ORDER, \la +#else + load32 pa_tlb_lock, \la +#endif rsm PSW_SM_I,\flags 1: LDCW 0(\la),\tmp cmpib,<>,n 0,\tmp,3f diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 30f92391a93e..cad3e8661cd6 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -39,6 +39,7 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/fs.h> +#include <linux/cpu.h> #include <linux/module.h> #include <linux/personality.h> #include <linux/ptrace.h> @@ -184,6 +185,44 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r) } /* + * Idle thread support + * + * Detect when running on QEMU with SeaBIOS PDC Firmware and let + * QEMU idle the host too. + */ + +int running_on_qemu __read_mostly; + +void __cpuidle arch_cpu_idle_dead(void) +{ + /* nop on real hardware, qemu will offline CPU. */ + asm volatile("or %%r31,%%r31,%%r31\n":::); +} + +void __cpuidle arch_cpu_idle(void) +{ + local_irq_enable(); + + /* nop on real hardware, qemu will idle sleep. */ + asm volatile("or %%r10,%%r10,%%r10\n":::); +} + +static int __init parisc_idle_init(void) +{ + const char *marker; + + /* check QEMU/SeaBIOS marker in PAGE0 */ + marker = (char *) &PAGE0->pad0; + running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0); + + if (!running_on_qemu) + cpu_idle_poll_ctrl(1); + + return 0; +} +arch_initcall(parisc_idle_init); + +/* * Copy architecture-specific thread state */ int diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 13f7854e0d49..48f41399fc0b 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -631,11 +631,11 @@ void __init mem_init(void) mem_init_print_info(NULL); #ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */ printk("virtual kernel memory layout:\n" - " vmalloc : 0x%p - 0x%p (%4ld MB)\n" - " memory : 0x%p - 0x%p (%4ld MB)\n" - " .init : 0x%p - 0x%p (%4ld kB)\n" - " .data : 0x%p - 0x%p (%4ld kB)\n" - " .text : 0x%p - 0x%p (%4ld kB)\n", + " vmalloc : 0x%px - 0x%px (%4ld MB)\n" + " memory : 0x%px - 0x%px (%4ld MB)\n" + " .init : 0x%px - 0x%px (%4ld kB)\n" + " .data : 0x%px - 0x%px (%4ld kB)\n" + " .text : 0x%px - 0x%px (%4ld kB)\n", (void*)VMALLOC_START, (void*)VMALLOC_END, (VMALLOC_END - VMALLOC_START) >> 20, diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 4797d08581ce..6e1e39035380 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -145,6 +145,11 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address) return __bad_area(regs, address, SEGV_MAPERR); } +static noinline int bad_access(struct pt_regs *regs, unsigned long address) +{ + return __bad_area(regs, address, SEGV_ACCERR); +} + static int do_sigbus(struct pt_regs *regs, unsigned long address, unsigned int fault) { @@ -490,7 +495,7 @@ retry: good_area: if (unlikely(access_error(is_write, is_exec, vma))) - return bad_area(regs, address); + return bad_access(regs, address); /* * If for any reason at all we couldn't handle the fault, diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 44cbf4c12ea1..df95102e732c 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev) } static struct lock_class_key fsl_msi_irq_class; +static struct lock_class_key fsl_msi_irq_request_class; static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, int offset, int irq_index) @@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, dev_err(&dev->dev, "No memory for MSI cascade data\n"); return -ENOMEM; } - irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class); + irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class, + &fsl_msi_irq_request_class); cascade_data->index = offset; cascade_data->msi_data = msi; cascade_data->virq = virt_msir; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ec8b68e97d3c..2c93cbbcd15e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -792,11 +792,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm) if (kvm->arch.use_cmma) { /* - * Get the last slot. They should be sorted by base_gfn, so the - * last slot is also the one at the end of the address space. - * We have verified above that at least one slot is present. + * Get the first slot. They are reverse sorted by base_gfn, so + * the first slot is also the one at the end of the address + * space. We have verified above that at least one slot is + * present. */ - ms = slots->memslots + slots->used_slots - 1; + ms = slots->memslots; /* round up so we only use full longs */ ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); /* allocate enough bytes to store all the bits */ diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 572496c688cc..0714bfa56da0 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1006,7 +1006,7 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) cbrlo[entries] = gfn << PAGE_SHIFT; } - if (orc) { + if (orc && gfn < ms->bitmap_size) { /* increment only if we are really flipping the bit to 1 */ if (!test_and_set_bit(gfn, ms->pgste_bitmap)) atomic64_inc(&ms->dirty_pages); diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index cae5a1e16cbd..c4f8039a35e8 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -89,11 +89,11 @@ EXPORT_SYMBOL(enable_sacf_uaccess); void disable_sacf_uaccess(mm_segment_t old_fs) { + current->thread.mm_segment = old_fs; if (old_fs == USER_DS && test_facility(27)) { __ctl_load(S390_lowcore.user_asce, 1, 1); clear_cpu_flag(CIF_ASCE_PRIMARY); } - current->thread.mm_segment = old_fs; } EXPORT_SYMBOL(disable_sacf_uaccess); diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index f7aa5a77827e..2d15d84c20ed 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -181,6 +181,9 @@ out_unlock: static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, size_t size, int flags) { + unsigned long irqflags; + int ret; + /* * With zdev->tlb_refresh == 0, rpcit is not required to establish new * translations when previously invalid translation-table entries are @@ -196,8 +199,22 @@ static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, return 0; } - return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, - PAGE_ALIGN(size)); + ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, + PAGE_ALIGN(size)); + if (ret == -ENOMEM && !s390_iommu_strict) { + /* enable the hypervisor to free some resources */ + if (zpci_refresh_global(zdev)) + goto out; + + spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags); + bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, + zdev->lazy_bitmap, zdev->iommu_pages); + bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); + spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags); + ret = 0; + } +out: + return ret; } static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 19bcb3b45a70..f069929e8211 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -89,6 +89,9 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range) if (cc) zpci_err_insn(cc, status, addr, range); + if (cc == 1 && (status == 4 || status == 16)) + return -ENOMEM; + return (cc) ? -EIO : 0; } diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S index e5547b22cd18..0ddbbb031822 100644 --- a/arch/sparc/lib/hweight.S +++ b/arch/sparc/lib/hweight.S @@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32) .previous ENTRY(__arch_hweight64) - sethi %hi(__sw_hweight16), %g1 - jmpl %g1 + %lo(__sw_hweight16), %g0 + sethi %hi(__sw_hweight64), %g1 + jmpl %g1 + %lo(__sw_hweight64), %g0 nop ENDPROC(__arch_hweight64) EXPORT_SYMBOL(__arch_hweight64) diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index 5f25b39f04d4..c4ac6043ebb0 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -298,7 +298,6 @@ void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } -EXPORT_SYMBOL(abort); void __init trap_init(void) { diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index d5364ca2e3f9..b5e5e02f8cde 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -23,6 +23,9 @@ */ #undef CONFIG_AMD_MEM_ENCRYPT +/* No PAGE_TABLE_ISOLATION support needed either: */ +#undef CONFIG_PAGE_TABLE_ISOLATION + #include "misc.h" /* These actually do the work of building the kernel identity maps. */ diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh index c9e8499fbfe7..6a10d52a4145 100644 --- a/arch/x86/boot/genimage.sh +++ b/arch/x86/boot/genimage.sh @@ -80,39 +80,43 @@ genfdimage288() { mcopy $FBZIMAGE w:linux } -genisoimage() { +geniso() { tmp_dir=`dirname $FIMAGE`/isoimage rm -rf $tmp_dir mkdir $tmp_dir - for i in lib lib64 share end ; do + for i in lib lib64 share ; do for j in syslinux ISOLINUX ; do if [ -f /usr/$i/$j/isolinux.bin ] ; then isolinux=/usr/$i/$j/isolinux.bin - cp $isolinux $tmp_dir fi done for j in syslinux syslinux/modules/bios ; do if [ -f /usr/$i/$j/ldlinux.c32 ]; then ldlinux=/usr/$i/$j/ldlinux.c32 - cp $ldlinux $tmp_dir fi done if [ -n "$isolinux" -a -n "$ldlinux" ] ; then break fi - if [ $i = end -a -z "$isolinux" ] ; then - echo 'Need an isolinux.bin file, please install syslinux/isolinux.' - exit 1 - fi done + if [ -z "$isolinux" ] ; then + echo 'Need an isolinux.bin file, please install syslinux/isolinux.' + exit 1 + fi + if [ -z "$ldlinux" ] ; then + echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.' + exit 1 + fi + cp $isolinux $tmp_dir + cp $ldlinux $tmp_dir cp $FBZIMAGE $tmp_dir/linux echo "$KCMDLINE" > $tmp_dir/isolinux.cfg if [ -f "$FDINITRD" ] ; then cp "$FDINITRD" $tmp_dir/initrd.img fi - mkisofs -J -r -input-charset=utf-8 -quiet -o $FIMAGE -b isolinux.bin \ - -c boot.cat -no-emul-boot -boot-load-size 4 -boot-info-table \ - $tmp_dir + genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \ + -b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \ + -boot-info-table $tmp_dir isohybrid $FIMAGE 2>/dev/null || true rm -rf $tmp_dir } @@ -121,6 +125,6 @@ case $1 in bzdisk) genbzdisk;; fdimage144) genfdimage144;; fdimage288) genfdimage288;; - isoimage) genisoimage;; + isoimage) geniso;; *) echo 'Unknown image format'; exit 1; esac diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 3fd8bc560fae..45a63e00a6af 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -1,6 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/jump_label.h> #include <asm/unwind_hints.h> +#include <asm/cpufeatures.h> +#include <asm/page_types.h> +#include <asm/percpu.h> +#include <asm/asm-offsets.h> +#include <asm/processor-flags.h> /* @@ -187,6 +192,146 @@ For 32-bit we have the following conventions - kernel is built with #endif .endm +#ifdef CONFIG_PAGE_TABLE_ISOLATION + +/* + * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two + * halves: + */ +#define PTI_SWITCH_PGTABLES_MASK (1<<PAGE_SHIFT) +#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT)) + +.macro SET_NOFLUSH_BIT reg:req + bts $X86_CR3_PCID_NOFLUSH_BIT, \reg +.endm + +.macro ADJUST_KERNEL_CR3 reg:req + ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID + /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */ + andq $(~PTI_SWITCH_MASK), \reg +.endm + +.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + mov %cr3, \scratch_reg + ADJUST_KERNEL_CR3 \scratch_reg + mov \scratch_reg, %cr3 +.Lend_\@: +.endm + +#define THIS_CPU_user_pcid_flush_mask \ + PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask + +.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + mov %cr3, \scratch_reg + + ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID + + /* + * Test if the ASID needs a flush. + */ + movq \scratch_reg, \scratch_reg2 + andq $(0x7FF), \scratch_reg /* mask ASID */ + bt \scratch_reg, THIS_CPU_user_pcid_flush_mask + jnc .Lnoflush_\@ + + /* Flush needed, clear the bit */ + btr \scratch_reg, THIS_CPU_user_pcid_flush_mask + movq \scratch_reg2, \scratch_reg + jmp .Lwrcr3_\@ + +.Lnoflush_\@: + movq \scratch_reg2, \scratch_reg + SET_NOFLUSH_BIT \scratch_reg + +.Lwrcr3_\@: + /* Flip the PGD and ASID to the user version */ + orq $(PTI_SWITCH_MASK), \scratch_reg + mov \scratch_reg, %cr3 +.Lend_\@: +.endm + +.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req + pushq %rax + SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax + popq %rax +.endm + +.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req + ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI + movq %cr3, \scratch_reg + movq \scratch_reg, \save_reg + /* + * Is the "switch mask" all zero? That means that both of + * these are zero: + * + * 1. The user/kernel PCID bit, and + * 2. The user/kernel "bit" that points CR3 to the + * bottom half of the 8k PGD + * + * That indicates a kernel CR3 value, not a user CR3. + */ + testq $(PTI_SWITCH_MASK), \scratch_reg + jz .Ldone_\@ + + ADJUST_KERNEL_CR3 \scratch_reg + movq \scratch_reg, %cr3 + +.Ldone_\@: +.endm + +.macro RESTORE_CR3 scratch_reg:req save_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + + ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID + + /* + * KERNEL pages can always resume with NOFLUSH as we do + * explicit flushes. + */ + bt $X86_CR3_PTI_SWITCH_BIT, \save_reg + jnc .Lnoflush_\@ + + /* + * Check if there's a pending flush for the user ASID we're + * about to set. + */ + movq \save_reg, \scratch_reg + andq $(0x7FF), \scratch_reg + bt \scratch_reg, THIS_CPU_user_pcid_flush_mask + jnc .Lnoflush_\@ + + btr \scratch_reg, THIS_CPU_user_pcid_flush_mask + jmp .Lwrcr3_\@ + +.Lnoflush_\@: + SET_NOFLUSH_BIT \save_reg + +.Lwrcr3_\@: + /* + * The CR3 write could be avoided when not changing its value, + * but would require a CR3 read *and* a scratch register. + */ + movq \save_reg, %cr3 +.Lend_\@: +.endm + +#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */ + +.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req +.endm +.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req +.endm +.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req +.endm +.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req +.endm +.macro RESTORE_CR3 scratch_reg:req save_reg:req +.endm + +#endif + #endif /* CONFIG_X86_64 */ /* diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3d19c830e1b1..f048e384ff54 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -23,7 +23,6 @@ #include <asm/segment.h> #include <asm/cache.h> #include <asm/errno.h> -#include "calling.h" #include <asm/asm-offsets.h> #include <asm/msr.h> #include <asm/unistd.h> @@ -40,6 +39,8 @@ #include <asm/frame.h> #include <linux/err.h> +#include "calling.h" + .code64 .section .entry.text, "ax" @@ -168,6 +169,9 @@ ENTRY(entry_SYSCALL_64_trampoline) /* Stash the user RSP. */ movq %rsp, RSP_SCRATCH + /* Note: using %rsp as a scratch reg. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp + /* Load the top of the task stack into RSP */ movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp @@ -207,6 +211,10 @@ ENTRY(entry_SYSCALL_64) */ swapgs + /* + * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it + * is not required to switch CR3. + */ movq %rsp, PER_CPU_VAR(rsp_scratch) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp @@ -403,6 +411,7 @@ syscall_return_via_sysret: * We are on the trampoline stack. All regs except RDI are live. * We can do future final exit work right here. */ + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi popq %rdi popq %rsp @@ -740,6 +749,8 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) * We can do future final exit work right here. */ + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi + /* Restore RDI. */ popq %rdi SWAPGS @@ -822,7 +833,9 @@ native_irq_return_ldt: */ pushq %rdi /* Stash user RDI */ - SWAPGS + SWAPGS /* to kernel GS */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ + movq PER_CPU_VAR(espfix_waddr), %rdi movq %rax, (0*8)(%rdi) /* user RAX */ movq (1*8)(%rsp), %rax /* user RIP */ @@ -838,7 +851,6 @@ native_irq_return_ldt: /* Now RAX == RSP. */ andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */ - popq %rdi /* Restore user RDI */ /* * espfix_stack[31:16] == 0. The page tables are set up such that @@ -849,7 +861,11 @@ native_irq_return_ldt: * still points to an RO alias of the ESPFIX stack. */ orq PER_CPU_VAR(espfix_stack), %rax - SWAPGS + + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi + SWAPGS /* to user GS */ + popq %rdi /* Restore user RDI */ + movq %rax, %rsp UNWIND_HINT_IRET_REGS offset=8 @@ -949,6 +965,8 @@ ENTRY(switch_to_thread_stack) UNWIND_HINT_FUNC pushq %rdi + /* Need to switch before accessing the thread stack. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi movq %rsp, %rdi movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI @@ -1250,7 +1268,11 @@ ENTRY(paranoid_entry) js 1f /* negative -> in kernel */ SWAPGS xorl %ebx, %ebx -1: ret + +1: + SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 + + ret END(paranoid_entry) /* @@ -1272,6 +1294,7 @@ ENTRY(paranoid_exit) testl %ebx, %ebx /* swapgs needed? */ jnz .Lparanoid_exit_no_swapgs TRACE_IRQS_IRETQ + RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 SWAPGS_UNSAFE_STACK jmp .Lparanoid_exit_restore .Lparanoid_exit_no_swapgs: @@ -1299,6 +1322,8 @@ ENTRY(error_entry) * from user mode due to an IRET fault. */ SWAPGS + /* We have user CR3. Change to kernel CR3. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax .Lerror_entry_from_usermode_after_swapgs: /* Put us onto the real thread stack. */ @@ -1345,6 +1370,7 @@ ENTRY(error_entry) * .Lgs_change's error handler with kernel gsbase. */ SWAPGS + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax jmp .Lerror_entry_done .Lbstep_iret: @@ -1354,10 +1380,11 @@ ENTRY(error_entry) .Lerror_bad_iret: /* - * We came from an IRET to user mode, so we have user gsbase. - * Switch to kernel gsbase: + * We came from an IRET to user mode, so we have user + * gsbase and CR3. Switch to kernel gsbase and CR3: */ SWAPGS + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax /* * Pretend that the exception came from user mode: set up pt_regs @@ -1389,6 +1416,10 @@ END(error_exit) /* * Runs on exception stack. Xen PV does not go through this path at all, * so we can use real assembly here. + * + * Registers: + * %r14: Used to save/restore the CR3 of the interrupted context + * when PAGE_TABLE_ISOLATION is in use. Do not clobber. */ ENTRY(nmi) UNWIND_HINT_IRET_REGS @@ -1452,6 +1483,7 @@ ENTRY(nmi) swapgs cld + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp UNWIND_HINT_IRET_REGS base=%rdx offset=8 @@ -1704,6 +1736,8 @@ end_repeat_nmi: movq $-1, %rsi call do_nmi + RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 + testl %ebx, %ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 95ad40eb7eff..98d5358e4041 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -49,6 +49,10 @@ ENTRY(entry_SYSENTER_compat) /* Interrupts are off on entry. */ SWAPGS + + /* We are about to clobber %rsp anyway, clobbering here is OK */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp /* @@ -186,8 +190,13 @@ ENTRY(entry_SYSCALL_compat) /* Interrupts are off on entry. */ swapgs - /* Stash user ESP and switch to the kernel stack. */ + /* Stash user ESP */ movl %esp, %r8d + + /* Use %rsp as scratch reg. User ESP is stashed in r8 */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp + + /* Switch to the kernel stack */ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp /* Construct struct pt_regs on stack */ @@ -256,10 +265,22 @@ sysret32_from_system_call: * when the system call started, which is already known to user * code. We zero R8-R10 to avoid info leaks. */ + movq RSP-ORIG_RAX(%rsp), %rsp + + /* + * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored + * on the process stack which is not mapped to userspace and + * not readable after we SWITCH_TO_USER_CR3. Delay the CR3 + * switch until after after the last reference to the process + * stack. + * + * %r8/%r9 are zeroed before the sysret, thus safe to clobber. + */ + SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 + xorq %r8, %r8 xorq %r9, %r9 xorq %r10, %r10 - movq RSP-ORIG_RAX(%rsp), %rsp swapgs sysretl END(entry_SYSCALL_compat) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 1faf40f2dda9..577fa8adb785 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -344,14 +344,14 @@ int in_gate_area_no_mm(unsigned long addr) * vsyscalls but leave the page not present. If so, we skip calling * this. */ -static void __init set_vsyscall_pgtable_user_bits(void) +void __init set_vsyscall_pgtable_user_bits(pgd_t *root) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; - pgd = pgd_offset_k(VSYSCALL_ADDR); + pgd = pgd_offset_pgd(root, VSYSCALL_ADDR); set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); p4d = p4d_offset(pgd, VSYSCALL_ADDR); #if CONFIG_PGTABLE_LEVELS >= 5 @@ -373,7 +373,7 @@ void __init map_vsyscall(void) vsyscall_mode == NATIVE ? PAGE_KERNEL_VSYSCALL : PAGE_KERNEL_VVAR); - set_vsyscall_pgtable_user_bits(); + set_vsyscall_pgtable_user_bits(swapper_pg_dir); } BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 09c26a4f139c..731153a4681e 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3847,6 +3847,8 @@ static struct attribute *intel_pmu_attrs[] = { __init int intel_pmu_init(void) { + struct attribute **extra_attr = NULL; + struct attribute **to_free = NULL; union cpuid10_edx edx; union cpuid10_eax eax; union cpuid10_ebx ebx; @@ -3854,7 +3856,6 @@ __init int intel_pmu_init(void) unsigned int unused; struct extra_reg *er; int version, i; - struct attribute **extra_attr = NULL; char *name; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { @@ -4294,6 +4295,7 @@ __init int intel_pmu_init(void) extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; extra_attr = merge_attr(extra_attr, skl_format_attr); + to_free = extra_attr; x86_pmu.cpu_events = get_hsw_events_attrs(); intel_pmu_pebs_data_source_skl( boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); @@ -4401,6 +4403,7 @@ __init int intel_pmu_init(void) pr_cont("full-width counters, "); } + kfree(to_free); return 0; } diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 3674a4b6f8bd..8156e47da7ba 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -3,16 +3,19 @@ #include <linux/types.h> #include <linux/slab.h> +#include <asm/cpu_entry_area.h> #include <asm/perf_event.h> +#include <asm/tlbflush.h> #include <asm/insn.h> #include "../perf_event.h" +/* Waste a full page so it can be mapped into the cpu_entry_area */ +DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store); + /* The size of a BTS record in bytes: */ #define BTS_RECORD_SIZE 24 -#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) -#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) #define PEBS_FIXUP_SIZE PAGE_SIZE /* @@ -279,17 +282,67 @@ void fini_debug_store_on_cpu(int cpu) static DEFINE_PER_CPU(void *, insn_buffer); -static int alloc_pebs_buffer(int cpu) +static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + unsigned long start = (unsigned long)cea; + phys_addr_t pa; + size_t msz = 0; + + pa = virt_to_phys(addr); + + preempt_disable(); + for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE) + cea_set_pte(cea, pa, prot); + + /* + * This is a cross-CPU update of the cpu_entry_area, we must shoot down + * all TLB entries for it. + */ + flush_tlb_kernel_range(start, start + size); + preempt_enable(); +} + +static void ds_clear_cea(void *cea, size_t size) +{ + unsigned long start = (unsigned long)cea; + size_t msz = 0; + + preempt_disable(); + for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE) + cea_set_pte(cea, 0, PAGE_NONE); + + flush_tlb_kernel_range(start, start + size); + preempt_enable(); +} + +static void *dsalloc_pages(size_t size, gfp_t flags, int cpu) +{ + unsigned int order = get_order(size); int node = cpu_to_node(cpu); - int max; - void *buffer, *ibuffer; + struct page *page; + + page = __alloc_pages_node(node, flags | __GFP_ZERO, order); + return page ? page_address(page) : NULL; +} + +static void dsfree_pages(const void *buffer, size_t size) +{ + if (buffer) + free_pages((unsigned long)buffer, get_order(size)); +} + +static int alloc_pebs_buffer(int cpu) +{ + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + size_t bsiz = x86_pmu.pebs_buffer_size; + int max, node = cpu_to_node(cpu); + void *buffer, *ibuffer, *cea; if (!x86_pmu.pebs) return 0; - buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); + buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu); if (unlikely(!buffer)) return -ENOMEM; @@ -300,25 +353,27 @@ static int alloc_pebs_buffer(int cpu) if (x86_pmu.intel_cap.pebs_format < 2) { ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); if (!ibuffer) { - kfree(buffer); + dsfree_pages(buffer, bsiz); return -ENOMEM; } per_cpu(insn_buffer, cpu) = ibuffer; } - - max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size; - - ds->pebs_buffer_base = (u64)(unsigned long)buffer; + hwev->ds_pebs_vaddr = buffer; + /* Update the cpu entry area mapping */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; + ds->pebs_buffer_base = (unsigned long) cea; + ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL); ds->pebs_index = ds->pebs_buffer_base; - ds->pebs_absolute_maximum = ds->pebs_buffer_base + - max * x86_pmu.pebs_record_size; - + max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size); + ds->pebs_absolute_maximum = ds->pebs_buffer_base + max; return 0; } static void release_pebs_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *cea; if (!ds || !x86_pmu.pebs) return; @@ -326,73 +381,70 @@ static void release_pebs_buffer(int cpu) kfree(per_cpu(insn_buffer, cpu)); per_cpu(insn_buffer, cpu) = NULL; - kfree((void *)(unsigned long)ds->pebs_buffer_base); + /* Clear the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; + ds_clear_cea(cea, x86_pmu.pebs_buffer_size); ds->pebs_buffer_base = 0; + dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size); + hwev->ds_pebs_vaddr = NULL; } static int alloc_bts_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - int node = cpu_to_node(cpu); - int max, thresh; - void *buffer; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *buffer, *cea; + int max; if (!x86_pmu.bts) return 0; - buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); + buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu); if (unlikely(!buffer)) { WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); return -ENOMEM; } - - max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; - thresh = max / 16; - - ds->bts_buffer_base = (u64)(unsigned long)buffer; + hwev->ds_bts_vaddr = buffer; + /* Update the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; + ds->bts_buffer_base = (unsigned long) cea; + ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL); ds->bts_index = ds->bts_buffer_base; - ds->bts_absolute_maximum = ds->bts_buffer_base + - max * BTS_RECORD_SIZE; - ds->bts_interrupt_threshold = ds->bts_absolute_maximum - - thresh * BTS_RECORD_SIZE; - + max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE); + ds->bts_absolute_maximum = ds->bts_buffer_base + max; + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16); return 0; } static void release_bts_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *cea; if (!ds || !x86_pmu.bts) return; - kfree((void *)(unsigned long)ds->bts_buffer_base); + /* Clear the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; + ds_clear_cea(cea, BTS_BUFFER_SIZE); ds->bts_buffer_base = 0; + dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE); + hwev->ds_bts_vaddr = NULL; } static int alloc_ds_buffer(int cpu) { - int node = cpu_to_node(cpu); - struct debug_store *ds; - - ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node); - if (unlikely(!ds)) - return -ENOMEM; + struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store; + memset(ds, 0, sizeof(*ds)); per_cpu(cpu_hw_events, cpu).ds = ds; - return 0; } static void release_ds_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - return; - per_cpu(cpu_hw_events, cpu).ds = NULL; - kfree(ds); } void release_ds_buffers(void) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index f7aaadf9331f..8e4ea143ed96 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -14,6 +14,8 @@ #include <linux/perf_event.h> +#include <asm/intel_ds.h> + /* To enable MSR tracing please use the generic trace points. */ /* @@ -77,8 +79,6 @@ struct amd_nb { struct event_constraint event_constraints[X86_PMC_IDX_MAX]; }; -/* The maximal number of PEBS events: */ -#define MAX_PEBS_EVENTS 8 #define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1) /* @@ -95,23 +95,6 @@ struct amd_nb { PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) -/* - * A debug store configuration. - * - * We only support architectures that use 64bit fields. - */ -struct debug_store { - u64 bts_buffer_base; - u64 bts_index; - u64 bts_absolute_maximum; - u64 bts_interrupt_threshold; - u64 pebs_buffer_base; - u64 pebs_index; - u64 pebs_absolute_maximum; - u64 pebs_interrupt_threshold; - u64 pebs_event_reset[MAX_PEBS_EVENTS]; -}; - #define PEBS_REGS \ (PERF_REG_X86_AX | \ PERF_REG_X86_BX | \ @@ -216,6 +199,8 @@ struct cpu_hw_events { * Intel DebugStore bits */ struct debug_store *ds; + void *ds_pebs_vaddr; + void *ds_bts_vaddr; u64 pebs_enabled; int n_pebs; int n_large_pebs; diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index dbfd0854651f..cf5961ca8677 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -140,7 +140,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ".popsection\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ - ".popsection" + ".popsection\n" #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ OLDINSTR_2(oldinstr, 1, 2) \ @@ -151,7 +151,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ - ".popsection" + ".popsection\n" /* * Alternative instructions for different CPU types or capabilities. diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 219faaec51df..386a6900e206 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -136,6 +136,7 @@ #endif #ifndef __ASSEMBLY__ +#ifndef __BPF__ /* * This output constraint should be used for any inline asm which has a "call" * instruction. Otherwise the asm may be inserted before the frame pointer @@ -145,5 +146,6 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif +#endif #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 2fbc69a0916e..4a7884b8dca5 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -5,6 +5,7 @@ #include <linux/percpu-defs.h> #include <asm/processor.h> +#include <asm/intel_ds.h> /* * cpu_entry_area is a percpu region that contains things needed by the CPU @@ -40,6 +41,18 @@ struct cpu_entry_area { */ char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; #endif +#ifdef CONFIG_CPU_SUP_INTEL + /* + * Per CPU debug store for Intel performance monitoring. Wastes a + * full page at the moment. + */ + struct debug_store cpu_debug_store; + /* + * The actual PEBS/BTS buffers must be mapped to user space + * Reserve enough fixmap PTEs. + */ + struct debug_store_buffers cpu_debug_buffers; +#endif }; #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 800104c8a3ed..21ac898df2d8 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -197,11 +197,12 @@ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ - +#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ @@ -340,5 +341,6 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index ec8be07c0cda..13c5ee878a47 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -21,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in desc->type = (info->read_exec_only ^ 1) << 1; desc->type |= info->contents << 2; + /* Set the ACCESS bit so it can be mapped RO */ + desc->type |= 1; desc->s = 1; desc->dpl = 0x3; diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 14d6d5007314..b027633e7300 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -50,6 +50,12 @@ # define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) #endif +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define DISABLE_PTI 0 +#else +# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -60,7 +66,7 @@ #define DISABLED_MASK4 (DISABLE_PCID) #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 -#define DISABLED_MASK7 0 +#define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_MPX) #define DISABLED_MASK10 0 diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h new file mode 100644 index 000000000000..62a9f4966b42 --- /dev/null +++ b/arch/x86/include/asm/intel_ds.h @@ -0,0 +1,36 @@ +#ifndef _ASM_INTEL_DS_H +#define _ASM_INTEL_DS_H + +#include <linux/percpu-defs.h> + +#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) +#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) + +/* The maximal number of PEBS events: */ +#define MAX_PEBS_EVENTS 8 + +/* + * A debug store configuration. + * + * We only support architectures that use 64bit fields. + */ +struct debug_store { + u64 bts_buffer_base; + u64 bts_index; + u64 bts_absolute_maximum; + u64 bts_interrupt_threshold; + u64 pebs_buffer_base; + u64 pebs_index; + u64 pebs_absolute_maximum; + u64 pebs_interrupt_threshold; + u64 pebs_event_reset[MAX_PEBS_EVENTS]; +} __aligned(PAGE_SIZE); + +DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store); + +struct debug_store_buffers { + char bts_buffer[BTS_BUFFER_SIZE]; + char pebs_buffer[PEBS_BUFFER_SIZE]; +}; + +#endif diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h index 139feef467f7..c066ffae222b 100644 --- a/arch/x86/include/asm/irqdomain.h +++ b/arch/x86/include/asm/irqdomain.h @@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); extern int mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early); + struct irq_data *irq_data, bool reserve); extern void mp_irqdomain_deactivate(struct irq_domain *domain, struct irq_data *irq_data); extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 5ede7cae1d67..c931b88982a0 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -50,10 +50,33 @@ struct ldt_struct { * call gates. On native, we could merge the ldt_struct and LDT * allocations, but it's not worth trying to optimize. */ - struct desc_struct *entries; - unsigned int nr_entries; + struct desc_struct *entries; + unsigned int nr_entries; + + /* + * If PTI is in use, then the entries array is not mapped while we're + * in user mode. The whole array will be aliased at the addressed + * given by ldt_slot_va(slot). We use two slots so that we can allocate + * and map, and enable a new LDT without invalidating the mapping + * of an older, still-in-use LDT. + * + * slot will be -1 if this LDT doesn't have an alias mapping. + */ + int slot; }; +/* This is a multiple of PAGE_SIZE. */ +#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) + +static inline void *ldt_slot_va(int slot) +{ +#ifdef CONFIG_X86_64 + return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); +#else + BUG(); +#endif +} + /* * Used for LDT copy/destruction. */ @@ -64,6 +87,7 @@ static inline void init_new_context_ldt(struct mm_struct *mm) } int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); void destroy_context_ldt(struct mm_struct *mm); +void ldt_arch_exit_mmap(struct mm_struct *mm); #else /* CONFIG_MODIFY_LDT_SYSCALL */ static inline void init_new_context_ldt(struct mm_struct *mm) { } static inline int ldt_dup_context(struct mm_struct *oldmm, @@ -71,7 +95,8 @@ static inline int ldt_dup_context(struct mm_struct *oldmm, { return 0; } -static inline void destroy_context_ldt(struct mm_struct *mm) {} +static inline void destroy_context_ldt(struct mm_struct *mm) { } +static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } #endif static inline void load_mm_ldt(struct mm_struct *mm) @@ -96,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm) * that we can see. */ - if (unlikely(ldt)) - set_ldt(ldt->entries, ldt->nr_entries); - else + if (unlikely(ldt)) { + if (static_cpu_has(X86_FEATURE_PTI)) { + if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { + /* + * Whoops -- either the new LDT isn't mapped + * (if slot == -1) or is mapped into a bogus + * slot (if slot > 1). + */ + clear_LDT(); + return; + } + + /* + * If page table isolation is enabled, ldt->entries + * will not be mapped in the userspace pagetables. + * Tell the CPU to access the LDT through the alias + * at ldt_slot_va(ldt->slot). + */ + set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); + } else { + set_ldt(ldt->entries, ldt->nr_entries); + } + } else { clear_LDT(); + } #else clear_LDT(); #endif @@ -194,6 +240,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) static inline void arch_exit_mmap(struct mm_struct *mm) { paravirt_arch_exit_mmap(mm); + ldt_arch_exit_mmap(mm); } #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 4b5e1eafada7..aff42e1da6ee 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {} */ extern gfp_t __userpte_alloc_gfp; +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * Instead of one PGD, we acquire two PGDs. Being order-1, it is + * both 8k in size and 8k-aligned. That lets us just flip bit 12 + * in a pointer to swap between the two 4k halves. + */ +#define PGD_ALLOCATION_ORDER 1 +#else +#define PGD_ALLOCATION_ORDER 0 +#endif + /* * Allocate and free page tables. */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 95e2dfd75521..e42b8943cb1a 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD]; int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user); void ptdump_walk_pgd_level_checkwx(void); #ifdef CONFIG_DEBUG_WX @@ -841,7 +842,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) static inline int p4d_bad(p4d_t p4d) { - return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; + unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER; + + if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + ignore_flags |= _PAGE_NX; + + return (p4d_flags(p4d) & ~ignore_flags) != 0; } #endif /* CONFIG_PGTABLE_LEVELS > 3 */ @@ -875,7 +881,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) static inline int pgd_bad(pgd_t pgd) { - return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; + unsigned long ignore_flags = _PAGE_USER; + + if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + ignore_flags |= _PAGE_NX; + + return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; } static inline int pgd_none(pgd_t pgd) @@ -904,7 +915,11 @@ static inline int pgd_none(pgd_t pgd) * pgd_offset() returns a (pgd_t *) * pgd_index() is used get the offset into the pgd page's array of pgd_t's; */ -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) +#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address))) +/* + * a shortcut to get a pgd_t in a given mm + */ +#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address)) /* * a shortcut which implies the use of the kernel's pgd, instead * of a process's @@ -1106,7 +1121,14 @@ static inline int pud_write(pud_t pud) */ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { - memcpy(dst, src, count * sizeof(pgd_t)); + memcpy(dst, src, count * sizeof(pgd_t)); +#ifdef CONFIG_PAGE_TABLE_ISOLATION + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + /* Clone the user space pgd as well */ + memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src), + count * sizeof(pgd_t)); +#endif } #define PTE_SHIFT ilog2(PTRS_PER_PTE) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index e9f05331e732..81462e9a34f6 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp) #endif } +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages + * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and + * the user one is in the last 4k. To switch between them, you + * just need to flip the 12th bit in their addresses. + */ +#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT + +/* + * This generates better code than the inline assembly in + * __set_bit(). + */ +static inline void *ptr_set_bit(void *ptr, int bit) +{ + unsigned long __ptr = (unsigned long)ptr; + + __ptr |= BIT(bit); + return (void *)__ptr; +} +static inline void *ptr_clear_bit(void *ptr, int bit) +{ + unsigned long __ptr = (unsigned long)ptr; + + __ptr &= ~BIT(bit); + return (void *)__ptr; +} + +static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp) +{ + return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp) +{ + return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp) +{ + return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) +{ + return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); +} +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + +/* + * Page table pages are page-aligned. The lower half of the top + * level is used for userspace and the top half for the kernel. + * + * Returns true for parts of the PGD that map userspace and + * false for the parts that map the kernel. + */ +static inline bool pgdp_maps_userspace(void *__ptr) +{ + unsigned long ptr = (unsigned long)__ptr; + + return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2); +} + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd); + +/* + * Take a PGD location (pgdp) and a pgd value that needs to be set there. + * Populates the user and returns the resulting PGD that must be set in + * the kernel copy of the page tables. + */ +static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + return pgd; + return __pti_set_user_pgd(pgdp, pgd); +} +#else +static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +{ + return pgd; +} +#endif + static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { +#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL) + p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd); +#else *p4dp = p4d; +#endif } static inline void native_p4d_clear(p4d_t *p4d) @@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d) static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { +#ifdef CONFIG_PAGE_TABLE_ISOLATION + *pgdp = pti_set_user_pgd(pgdp, pgd); +#else *pgdp = pgd; +#endif } static inline void native_pgd_clear(pgd_t *pgd) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 3d27831bc58d..6b8f73dcbc2c 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -75,17 +75,27 @@ typedef struct { pteval_t pte; } pte_t; #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) -/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ +/* + * See Documentation/x86/x86_64/mm.txt for a description of the memory map. + * + * Be very careful vs. KASLR when changing anything here. The KASLR address + * range must not overlap with anything except the KASAN shadow area, which + * is correct as KASAN disables KASLR. + */ #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) #ifdef CONFIG_X86_5LEVEL -# define VMALLOC_SIZE_TB _AC(16384, UL) -# define __VMALLOC_BASE _AC(0xff92000000000000, UL) +# define VMALLOC_SIZE_TB _AC(12800, UL) +# define __VMALLOC_BASE _AC(0xffa0000000000000, UL) # define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) +# define LDT_PGD_ENTRY _AC(-112, UL) +# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #else # define VMALLOC_SIZE_TB _AC(32, UL) # define __VMALLOC_BASE _AC(0xffffc90000000000, UL) # define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) +# define LDT_PGD_ENTRY _AC(-3, UL) +# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #endif #ifdef CONFIG_RANDOMIZE_MEMORY @@ -100,13 +110,13 @@ typedef struct { pteval_t pte; } pte_t; #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ -#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) +#define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) #define ESPFIX_PGD_ENTRY _AC(-2, UL) #define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) -#define CPU_ENTRY_AREA_PGD _AC(-3, UL) +#define CPU_ENTRY_AREA_PGD _AC(-4, UL) #define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT) #define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 43212a43ee69..6a60fea90b9d 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -38,6 +38,11 @@ #define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull) #define CR3_PCID_MASK 0xFFFull #define CR3_NOFLUSH BIT_ULL(63) + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define X86_CR3_PTI_SWITCH_BIT 11 +#endif + #else /* * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index cad8dab266bc..d3a67fba200a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -852,13 +852,22 @@ static inline void spin_lock_prefetch(const void *x) #else /* - * User space process size. 47bits minus one guard page. The guard - * page is necessary on Intel CPUs: if a SYSCALL instruction is at - * the highest possible canonical userspace address, then that - * syscall will enter the kernel with a non-canonical return - * address, and SYSRET will explode dangerously. We avoid this - * particular problem by preventing anything from being mapped - * at the maximum canonical address. + * User space process size. This is the first address outside the user range. + * There are a few constraints that determine this: + * + * On Intel CPUs, if a SYSCALL instruction is at the highest canonical + * address, then that syscall will enter the kernel with a + * non-canonical return address, and SYSRET will explode dangerously. + * We avoid this particular problem by preventing anything executable + * from being mapped at the maximum canonical address. + * + * On AMD CPUs in the Ryzen family, there's a nasty bug in which the + * CPUs malfunction if they execute code from the highest canonical page. + * They'll speculate right off the end of the canonical space, and + * bad things happen. This is worked around in the same way as the + * Intel problem. + * + * With page table isolation enabled, we map the LDT in ... [stay tuned] */ #define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h new file mode 100644 index 000000000000..0b5ef05b2d2d --- /dev/null +++ b/arch/x86/include/asm/pti.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _ASM_X86_PTI_H +#define _ASM_X86_PTI_H +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +extern void pti_init(void); +extern void pti_check_boottime_disable(void); +#else +static inline void pti_check_boottime_disable(void) { } +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_PTI_H */ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 9b6df68d8fd1..eb5f7999a893 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -16,8 +16,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); /* This runs runs on the previous thread's stack. */ -static inline void prepare_switch_to(struct task_struct *prev, - struct task_struct *next) +static inline void prepare_switch_to(struct task_struct *next) { #ifdef CONFIG_VMAP_STACK /* @@ -70,7 +69,7 @@ struct fork_frame { #define switch_to(prev, next, last) \ do { \ - prepare_switch_to(prev, next); \ + prepare_switch_to(next); \ \ ((last) = __switch_to_asm((prev), (next))); \ } while (0) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index e1884cf35257..4a08dd2ab32a 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -10,38 +10,90 @@ #include <asm/special_insns.h> #include <asm/smp.h> #include <asm/invpcid.h> +#include <asm/pti.h> +#include <asm/processor-flags.h> -static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) -{ - /* - * Bump the generation count. This also serves as a full barrier - * that synchronizes with switch_mm(): callers are required to order - * their read of mm_cpumask after their writes to the paging - * structures. - */ - return atomic64_inc_return(&mm->context.tlb_gen); -} +/* + * The x86 feature is called PCID (Process Context IDentifier). It is similar + * to what is traditionally called ASID on the RISC processors. + * + * We don't use the traditional ASID implementation, where each process/mm gets + * its own ASID and flush/restart when we run out of ASID space. + * + * Instead we have a small per-cpu array of ASIDs and cache the last few mm's + * that came by on this CPU, allowing cheaper switch_mm between processes on + * this CPU. + * + * We end up with different spaces for different things. To avoid confusion we + * use different names for each of them: + * + * ASID - [0, TLB_NR_DYN_ASIDS-1] + * the canonical identifier for an mm + * + * kPCID - [1, TLB_NR_DYN_ASIDS] + * the value we write into the PCID part of CR3; corresponds to the + * ASID+1, because PCID 0 is special. + * + * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS] + * for KPTI each mm has two address spaces and thus needs two + * PCID values, but we can still do with a single ASID denomination + * for each mm. Corresponds to kPCID + 2048. + * + */ /* There are 12 bits of space for ASIDS in CR3 */ #define CR3_HW_ASID_BITS 12 + /* * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for * user/kernel switches */ -#define PTI_CONSUMED_ASID_BITS 0 +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define PTI_CONSUMED_PCID_BITS 1 +#else +# define PTI_CONSUMED_PCID_BITS 0 +#endif + +#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS) -#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS) /* * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account - * for them being zero-based. Another -1 is because ASID 0 is reserved for + * for them being zero-based. Another -1 is because PCID 0 is reserved for * use by non-PCID-aware users. */ -#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2) +#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2) + +/* + * 6 because 6 should be plenty and struct tlb_state will fit in two cache + * lines. + */ +#define TLB_NR_DYN_ASIDS 6 +/* + * Given @asid, compute kPCID + */ static inline u16 kern_pcid(u16 asid) { VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * Make sure that the dynamic ASID space does not confict with the + * bit we are using to switch between user and kernel ASIDs. + */ + BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT)); + /* + * The ASID being passed in here should have respected the + * MAX_ASID_AVAILABLE and thus never have the switch bit set. + */ + VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT)); +#endif + /* + * The dynamically-assigned ASIDs that get passed in are small + * (<TLB_NR_DYN_ASIDS). They never have the high switch bit set, + * so do not bother to clear it. + * * If PCID is on, ASID-aware code paths put the ASID+1 into the * PCID bits. This serves two purposes. It prevents a nasty * situation in which PCID-unaware code saves CR3, loads some other @@ -53,6 +105,18 @@ static inline u16 kern_pcid(u16 asid) return asid + 1; } +/* + * Given @asid, compute uPCID + */ +static inline u16 user_pcid(u16 asid) +{ + u16 ret = kern_pcid(asid); +#ifdef CONFIG_PAGE_TABLE_ISOLATION + ret |= 1 << X86_CR3_PTI_SWITCH_BIT; +#endif + return ret; +} + struct pgd_t; static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) { @@ -95,12 +159,6 @@ static inline bool tlb_defer_switch_to_init_mm(void) return !static_cpu_has(X86_FEATURE_PCID); } -/* - * 6 because 6 should be plenty and struct tlb_state will fit in - * two cache lines. - */ -#define TLB_NR_DYN_ASIDS 6 - struct tlb_context { u64 ctx_id; u64 tlb_gen; @@ -135,6 +193,24 @@ struct tlb_state { bool is_lazy; /* + * If set we changed the page tables in such a way that we + * needed an invalidation of all contexts (aka. PCIDs / ASIDs). + * This tells us to go invalidate all the non-loaded ctxs[] + * on the next context switch. + * + * The current ctx was kept up-to-date as it ran and does not + * need to be invalidated. + */ + bool invalidate_other; + + /* + * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate + * the corresponding user PCID needs a flush next time we + * switch to it; see SWITCH_TO_USER_CR3. + */ + unsigned short user_pcid_flush_mask; + + /* * Access to this CR4 shadow and to H/W CR4 is protected by * disabling interrupts when modifying either one. */ @@ -215,6 +291,14 @@ static inline unsigned long cr4_read_shadow(void) } /* + * Mark all other ASIDs as invalid, preserves the current. + */ +static inline void invalidate_other_asid(void) +{ + this_cpu_write(cpu_tlbstate.invalidate_other, true); +} + +/* * Save some of cr4 feature set we're using (e.g. Pentium 4MB * enable and PPro Global page enable), so that any CPU's that boot * up after us can get the correct flags. This should only be used @@ -234,18 +318,47 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) extern void initialize_tlbstate_and_flush(void); /* + * Given an ASID, flush the corresponding user ASID. We can delay this + * until the next time we switch to it. + * + * See SWITCH_TO_USER_CR3. + */ +static inline void invalidate_user_asid(u16 asid) +{ + /* There is no user ASID if address space separation is off */ + if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + return; + + /* + * We only have a single ASID if PCID is off and the CR3 + * write will have flushed it. + */ + if (!cpu_feature_enabled(X86_FEATURE_PCID)) + return; + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + __set_bit(kern_pcid(asid), + (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask)); +} + +/* * flush the entire current user mapping */ static inline void __native_flush_tlb(void) { /* - * If current->mm == NULL then we borrow a mm which may change during a - * task switch and therefore we must not be preempted while we write CR3 - * back: + * Preemption or interrupts must be disabled to protect the access + * to the per CPU variable and to prevent being preempted between + * read_cr3() and write_cr3(). */ - preempt_disable(); + WARN_ON_ONCE(preemptible()); + + invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid)); + + /* If current->mm == NULL then the read_cr3() "borrows" an mm */ native_write_cr3(__native_read_cr3()); - preempt_enable(); } /* @@ -259,6 +372,8 @@ static inline void __native_flush_tlb_global(void) /* * Using INVPCID is considerably faster than a pair of writes * to CR4 sandwiched inside an IRQ flag save/restore. + * + * Note, this works with CR4.PCIDE=0 or 1. */ invpcid_flush_all(); return; @@ -285,7 +400,21 @@ static inline void __native_flush_tlb_global(void) */ static inline void __native_flush_tlb_single(unsigned long addr) { + u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + /* + * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1. + * Just use invalidate_user_asid() in case we are called early. + */ + if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) + invalidate_user_asid(loaded_mm_asid); + else + invpcid_flush_one(user_pcid(loaded_mm_asid), addr); } /* @@ -301,14 +430,6 @@ static inline void __flush_tlb_all(void) */ __flush_tlb(); } - - /* - * Note: if we somehow had PCID but not PGE, then this wouldn't work -- - * we'd end up flushing kernel translations for the current ASID but - * we might fail to flush kernel translations for other cached ASIDs. - * - * To avoid this issue, we force PCID off if PGE is off. - */ } /* @@ -318,6 +439,16 @@ static inline void __flush_tlb_one(unsigned long addr) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); __flush_tlb_single(addr); + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + /* + * __flush_tlb_single() will have cleared the TLB entry for this ASID, + * but since kernel space is replicated across all, we must also + * invalidate all others. + */ + invalidate_other_asid(); } #define TLB_FLUSH_ALL -1UL @@ -378,6 +509,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) void native_flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info); +static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) +{ + /* + * Bump the generation count. This also serves as a full barrier + * that synchronizes with switch_mm(): callers are required to order + * their read of mm_cpumask after their writes to the paging + * structures. + */ + return atomic64_inc_return(&mm->context.tlb_gen); +} + static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, struct mm_struct *mm) { diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 84b9ec0c1bc0..22647a642e98 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -283,34 +283,34 @@ TRACE_EVENT(vector_alloc_managed, DECLARE_EVENT_CLASS(vector_activate, TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve, - bool early), + bool reserve), - TP_ARGS(irq, is_managed, can_reserve, early), + TP_ARGS(irq, is_managed, can_reserve, reserve), TP_STRUCT__entry( __field( unsigned int, irq ) __field( bool, is_managed ) __field( bool, can_reserve ) - __field( bool, early ) + __field( bool, reserve ) ), TP_fast_assign( __entry->irq = irq; __entry->is_managed = is_managed; __entry->can_reserve = can_reserve; - __entry->early = early; + __entry->reserve = reserve; ), - TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d", + TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d", __entry->irq, __entry->is_managed, __entry->can_reserve, - __entry->early) + __entry->reserve) ); #define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \ DEFINE_EVENT_FN(vector_activate, name, \ TP_PROTO(unsigned int irq, bool is_managed, \ - bool can_reserve, bool early), \ - TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL); \ + bool can_reserve, bool reserve), \ + TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL); \ DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate); DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate); diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index c1688c2d0a12..1f86e1b0a5cd 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -56,18 +56,27 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) /* - * WARNING: The entire pt_regs may not be safe to dereference. In some cases, - * only the iret frame registers are accessible. Use with caution! + * If 'partial' returns true, only the iret frame registers are valid. */ -static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) +static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, + bool *partial) { if (unwind_done(state)) return NULL; + if (partial) { +#ifdef CONFIG_UNWINDER_ORC + *partial = !state->full_regs; +#else + *partial = false; +#endif + } + return state->regs; } #else -static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) +static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, + bool *partial) { return NULL; } diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index d9a7c659009c..b986b2ca688a 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -7,6 +7,7 @@ #ifdef CONFIG_X86_VSYSCALL_EMULATION extern void map_vsyscall(void); +extern void set_vsyscall_pgtable_user_bits(pgd_t *root); /* * Called on instruction fetch fault in vsyscall page. diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index 7e1e730396ae..bcba3c643e63 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -78,7 +78,12 @@ #define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT) #define X86_CR3_PCD_BIT 4 /* Page Cache Disable */ #define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT) -#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */ + +#define X86_CR3_PCID_BITS 12 +#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL)) + +#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */ +#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT) /* * Intel CPU features in CR4 diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 6e272f3ea984..880441f24146 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2626,11 +2626,13 @@ static int __init apic_set_verbosity(char *arg) apic_verbosity = APIC_DEBUG; else if (strcmp("verbose", arg) == 0) apic_verbosity = APIC_VERBOSE; +#ifdef CONFIG_X86_64 else { pr_warning("APIC Verbosity level %s not recognised" " use apic=verbose or apic=debug\n", arg); return -EINVAL; } +#endif return 0; } diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index aa85690e9b64..25a87028cb3f 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -151,7 +151,7 @@ static struct apic apic_flat __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = flat_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 1, /* logical */ .disable_esr = 0, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 7b659c4480c9..5078b5ce63a7 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -110,7 +110,7 @@ struct apic apic_noop __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = noop_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 201579dc5242..8a7963421460 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2988,7 +2988,7 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, } int mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { unsigned long flags; diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 9b18be764422..ce503c99f5c4 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -39,17 +39,13 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) ((apic->irq_dest_mode == 0) ? MSI_ADDR_DEST_MODE_PHYSICAL : MSI_ADDR_DEST_MODE_LOGICAL) | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU : - MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_REDIRECTION_CPU | MSI_ADDR_DEST_ID(cfg->dest_apicid); msg->data = MSI_DATA_TRIGGER_EDGE | MSI_DATA_LEVEL_ASSERT | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED : - MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_DELIVERY_FIXED | MSI_DATA_VECTOR(cfg->vector); } diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index fa22017de806..02e8acb134f8 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -105,7 +105,7 @@ static struct apic apic_default __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = default_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 750449152b04..f8b03bb8e725 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -184,6 +184,7 @@ static void reserve_irq_vector_locked(struct irq_data *irqd) irq_matrix_reserve(vector_matrix); apicd->can_reserve = true; apicd->has_reserved = true; + irqd_set_can_reserve(irqd); trace_vector_reserve(irqd->irq, 0); vector_assign_managed_shutdown(irqd); } @@ -368,8 +369,18 @@ static int activate_reserved(struct irq_data *irqd) int ret; ret = assign_irq_vector_any_locked(irqd); - if (!ret) + if (!ret) { apicd->has_reserved = false; + /* + * Core might have disabled reservation mode after + * allocating the irq descriptor. Ideally this should + * happen before allocation time, but that would require + * completely convoluted ways of transporting that + * information. + */ + if (!irqd_can_reserve(irqd)) + apicd->can_reserve = false; + } return ret; } @@ -398,21 +409,21 @@ static int activate_managed(struct irq_data *irqd) } static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, - bool early) + bool reserve) { struct apic_chip_data *apicd = apic_chip_data(irqd); unsigned long flags; int ret = 0; trace_vector_activate(irqd->irq, apicd->is_managed, - apicd->can_reserve, early); + apicd->can_reserve, reserve); /* Nothing to do for fixed assigned vectors */ if (!apicd->can_reserve && !apicd->is_managed) return 0; raw_spin_lock_irqsave(&vector_lock, flags); - if (early || irqd_is_managed_and_shutdown(irqd)) + if (reserve || irqd_is_managed_and_shutdown(irqd)) vector_assign_managed_shutdown(irqd); else if (apicd->is_managed) ret = activate_managed(irqd); @@ -478,6 +489,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd, } else { /* Release the vector */ apicd->can_reserve = true; + irqd_set_can_reserve(irqd); clear_irq_vector(irqd); realloc = true; } diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 622f13ca8a94..8b04234e010b 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -184,7 +184,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .apic_id_valid = x2apic_apic_id_valid, .apic_id_registered = x2apic_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 1, /* logical */ .disable_esr = 0, diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 676b7cf4b62b..76417a9aab73 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -17,6 +17,7 @@ #include <asm/sigframe.h> #include <asm/bootparam.h> #include <asm/suspend.h> +#include <asm/tlbflush.h> #ifdef CONFIG_XEN #include <xen/interface/xen.h> @@ -94,6 +95,9 @@ void common(void) { BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + /* TLB state for the entry code */ + OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask); + /* Layout info for cpu_entry_area */ OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c9757f07d738..39d7ea865207 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -922,6 +922,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) } setup_force_cpu_cap(X86_FEATURE_ALWAYS); + + if (c->x86_vendor != X86_VENDOR_AMD) + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + fpu__init_system(c); #ifdef CONFIG_X86_32 @@ -1360,7 +1364,10 @@ void syscall_init(void) (entry_SYSCALL_64_trampoline - _entry_trampoline); wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); - wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); + if (static_cpu_has(X86_FEATURE_PTI)) + wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); + else + wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); #ifdef CONFIG_IA32_EMULATION wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 36b17e0febe8..afbecff161d1 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -76,12 +76,23 @@ void show_iret_regs(struct pt_regs *regs) regs->sp, regs->flags); } -static void show_regs_safe(struct stack_info *info, struct pt_regs *regs) +static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, + bool partial) { - if (on_stack(info, regs, sizeof(*regs))) + /* + * These on_stack() checks aren't strictly necessary: the unwind code + * has already validated the 'regs' pointer. The checks are done for + * ordering reasons: if the registers are on the next stack, we don't + * want to print them out yet. Otherwise they'll be shown as part of + * the wrong stack. Later, when show_trace_log_lvl() switches to the + * next stack, this function will be called again with the same regs so + * they can be printed in the right context. + */ + if (!partial && on_stack(info, regs, sizeof(*regs))) { __show_regs(regs, 0); - else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET, - IRET_FRAME_SIZE)) { + + } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET, + IRET_FRAME_SIZE)) { /* * When an interrupt or exception occurs in entry code, the * full pt_regs might not have been saved yet. In that case @@ -98,11 +109,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, struct stack_info stack_info = {0}; unsigned long visit_mask = 0; int graph_idx = 0; + bool partial; printk("%sCall Trace:\n", log_lvl); unwind_start(&state, task, regs, stack); stack = stack ? : get_stack_pointer(task, regs); + regs = unwind_get_entry_regs(&state, &partial); /* * Iterate through the stacks, starting with the current stack pointer. @@ -120,7 +133,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - hardirq stack * - entry stack */ - for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { + for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { const char *stack_name; if (get_stack_info(stack, task, &stack_info, &visit_mask)) { @@ -140,7 +153,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, printk("%s <%s>\n", log_lvl, stack_name); if (regs) - show_regs_safe(&stack_info, regs); + show_regs_if_on_stack(&stack_info, regs, partial); /* * Scan the stack, printing any text addresses we find. At the @@ -164,7 +177,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, /* * Don't print regs->ip again if it was already printed - * by show_regs_safe() below. + * by show_regs_if_on_stack(). */ if (regs && stack == ®s->ip) goto next; @@ -199,9 +212,9 @@ next: unwind_next_frame(&state); /* if the frame has entry regs, print them */ - regs = unwind_get_entry_regs(&state); + regs = unwind_get_entry_regs(&state, &partial); if (regs) - show_regs_safe(&stack_info, regs); + show_regs_if_on_stack(&stack_info, regs, partial); } if (stack_name) @@ -297,11 +310,13 @@ int __die(const char *str, struct pt_regs *regs, long err) unsigned long sp; #endif printk(KERN_DEFAULT - "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter, + "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter, IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", IS_ENABLED(CONFIG_SMP) ? " SMP" : "", debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", - IS_ENABLED(CONFIG_KASAN) ? " KASAN" : ""); + IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "", + IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? + (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7dca675fe78d..04a625f0fcda 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag) .balign PAGE_SIZE; \ GLOBAL(name) +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * Each PGD needs to be 8k long and 8k aligned. We do not + * ever go out to userspace with these, so we do not + * strictly *need* the second page, but this allows us to + * have a single set_pgd() implementation that does not + * need to worry about whether it has 4k or 8k to work + * with. + * + * This ensures PGDs are 8k long: + */ +#define PTI_USER_PGD_FILL 512 +/* This ensures they are 8k-aligned: */ +#define NEXT_PGD_PAGE(name) \ + .balign 2 * PAGE_SIZE; \ +GLOBAL(name) +#else +#define NEXT_PGD_PAGE(name) NEXT_PAGE(name) +#define PTI_USER_PGD_FILL 0 +#endif + /* Automate the creation of 1 to 1 mapping pmd entries */ #define PMDS(START, PERM, COUNT) \ i = 0 ; \ @@ -350,13 +371,14 @@ GLOBAL(name) .endr __INITDATA -NEXT_PAGE(early_top_pgt) +NEXT_PGD_PAGE(early_top_pgt) .fill 511,8,0 #ifdef CONFIG_X86_5LEVEL .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC #else .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC #endif + .fill PTI_USER_PGD_FILL,8,0 NEXT_PAGE(early_dynamic_pgts) .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 @@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts) .data #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) -NEXT_PAGE(init_top_pgt) +NEXT_PGD_PAGE(init_top_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .org init_top_pgt + PGD_START_KERNEL*8, 0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC + .fill PTI_USER_PGD_FILL,8,0 NEXT_PAGE(level3_ident_pgt) .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC @@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt) */ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) #else -NEXT_PAGE(init_top_pgt) +NEXT_PGD_PAGE(init_top_pgt) .fill 512,8,0 + .fill PTI_USER_PGD_FILL,8,0 #endif #ifdef CONFIG_X86_5LEVEL diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index a6b5d62f45a7..26d713ecad34 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -24,6 +24,7 @@ #include <linux/uaccess.h> #include <asm/ldt.h> +#include <asm/tlb.h> #include <asm/desc.h> #include <asm/mmu_context.h> #include <asm/syscalls.h> @@ -51,13 +52,11 @@ static void refresh_ldt_segments(void) static void flush_ldt(void *__mm) { struct mm_struct *mm = __mm; - mm_context_t *pc; if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm) return; - pc = &mm->context; - set_ldt(pc->ldt->entries, pc->ldt->nr_entries); + load_mm_ldt(mm); refresh_ldt_segments(); } @@ -94,10 +93,126 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) return NULL; } + /* The new LDT isn't aliased for PTI yet. */ + new_ldt->slot = -1; + new_ldt->nr_entries = num_entries; return new_ldt; } +/* + * If PTI is enabled, this maps the LDT into the kernelmode and + * usermode tables for the given mm. + * + * There is no corresponding unmap function. Even if the LDT is freed, we + * leave the PTEs around until the slot is reused or the mm is destroyed. + * This is harmless: the LDT is always in ordinary memory, and no one will + * access the freed slot. + * + * If we wanted to unmap freed LDTs, we'd also need to do a flush to make + * it useful, and the flush would slow down modify_ldt(). + */ +static int +map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + bool is_vmalloc, had_top_level_entry; + unsigned long va; + spinlock_t *ptl; + pgd_t *pgd; + int i; + + if (!static_cpu_has(X86_FEATURE_PTI)) + return 0; + + /* + * Any given ldt_struct should have map_ldt_struct() called at most + * once. + */ + WARN_ON(ldt->slot != -1); + + /* + * Did we already have the top level entry allocated? We can't + * use pgd_none() for this because it doens't do anything on + * 4-level page table kernels. + */ + pgd = pgd_offset(mm, LDT_BASE_ADDR); + had_top_level_entry = (pgd->pgd != 0); + + is_vmalloc = is_vmalloc_addr(ldt->entries); + + for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) { + unsigned long offset = i << PAGE_SHIFT; + const void *src = (char *)ldt->entries + offset; + unsigned long pfn; + pte_t pte, *ptep; + + va = (unsigned long)ldt_slot_va(slot) + offset; + pfn = is_vmalloc ? vmalloc_to_pfn(src) : + page_to_pfn(virt_to_page(src)); + /* + * Treat the PTI LDT range as a *userspace* range. + * get_locked_pte() will allocate all needed pagetables + * and account for them in this mm. + */ + ptep = get_locked_pte(mm, va, &ptl); + if (!ptep) + return -ENOMEM; + /* + * Map it RO so the easy to find address is not a primary + * target via some kernel interface which misses a + * permission check. + */ + pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL)); + set_pte_at(mm, va, ptep, pte); + pte_unmap_unlock(ptep, ptl); + } + + if (mm->context.ldt) { + /* + * We already had an LDT. The top-level entry should already + * have been allocated and synchronized with the usermode + * tables. + */ + WARN_ON(!had_top_level_entry); + if (static_cpu_has(X86_FEATURE_PTI)) + WARN_ON(!kernel_to_user_pgdp(pgd)->pgd); + } else { + /* + * This is the first time we're mapping an LDT for this process. + * Sync the pgd to the usermode tables. + */ + WARN_ON(had_top_level_entry); + if (static_cpu_has(X86_FEATURE_PTI)) { + WARN_ON(kernel_to_user_pgdp(pgd)->pgd); + set_pgd(kernel_to_user_pgdp(pgd), *pgd); + } + } + + va = (unsigned long)ldt_slot_va(slot); + flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0); + + ldt->slot = slot; +#endif + return 0; +} + +static void free_ldt_pgtables(struct mm_struct *mm) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + struct mmu_gather tlb; + unsigned long start = LDT_BASE_ADDR; + unsigned long end = start + (1UL << PGDIR_SHIFT); + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + tlb_gather_mmu(&tlb, mm, start, end); + free_pgd_range(&tlb, start, end, start, end); + tlb_finish_mmu(&tlb, start, end); +#endif +} + /* After calling this, the LDT is immutable. */ static void finalize_ldt_struct(struct ldt_struct *ldt) { @@ -156,6 +271,12 @@ int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm) new_ldt->nr_entries * LDT_ENTRY_SIZE); finalize_ldt_struct(new_ldt); + retval = map_ldt_struct(mm, new_ldt, 0); + if (retval) { + free_ldt_pgtables(mm); + free_ldt_struct(new_ldt); + goto out_unlock; + } mm->context.ldt = new_ldt; out_unlock: @@ -174,6 +295,11 @@ void destroy_context_ldt(struct mm_struct *mm) mm->context.ldt = NULL; } +void ldt_arch_exit_mmap(struct mm_struct *mm) +{ + free_ldt_pgtables(mm); +} + static int read_ldt(void __user *ptr, unsigned long bytecount) { struct mm_struct *mm = current->mm; @@ -287,6 +413,25 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) new_ldt->entries[ldt_info.entry_number] = ldt; finalize_ldt_struct(new_ldt); + /* + * If we are using PTI, map the new LDT into the userspace pagetables. + * If there is already an LDT, use the other slot so that other CPUs + * will continue to use the old LDT until install_ldt() switches + * them over to the new LDT. + */ + error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0); + if (error) { + /* + * This only can fail for the first LDT setup. If an LDT is + * already installed then the PTE page is already + * populated. Mop up a half populated page table. + */ + if (!WARN_ON_ONCE(old_ldt)) + free_ldt_pgtables(mm); + free_ldt_struct(new_ldt); + goto out_unlock; + } + install_ldt(mm, new_ldt); free_ldt_struct(old_ldt); error = 0; diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 00bc751c861c..edfede768688 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -48,8 +48,6 @@ static void load_segments(void) "\tmovl $"STR(__KERNEL_DS)",%%eax\n" "\tmovl %%eax,%%ds\n" "\tmovl %%eax,%%es\n" - "\tmovl %%eax,%%fs\n" - "\tmovl %%eax,%%gs\n" "\tmovl %%eax,%%ss\n" : : : "eax", "memory"); #undef STR @@ -232,8 +230,8 @@ void machine_kexec(struct kimage *image) * The gdt & idt are now invalid. * If you want to load them you must set up your own idt & gdt. */ - set_gdt(phys_to_virt(0), 0); idt_invalidate(phys_to_virt(0)); + set_gdt(phys_to_virt(0), 0); /* now call it */ image->start = relocate_kernel_ptr((unsigned long)image->head, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index aed9d94bd46f..832a6acd730f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -47,7 +47,7 @@ * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = { +__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = { .x86_tss = { /* * .sp0 is only used when entering ring 0 from a lower diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8af2e8d0c0a1..145810b0edf6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -906,9 +906,6 @@ void __init setup_arch(char **cmdline_p) set_bit(EFI_BOOT, &efi.flags); set_bit(EFI_64BIT, &efi.flags); } - - if (efi_enabled(EFI_BOOT)) - efi_memblock_x86_reserve_range(); #endif x86_init.oem.arch_setup(); @@ -962,6 +959,8 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); + if (efi_enabled(EFI_BOOT)) + efi_memblock_x86_reserve_range(); #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory used by the kernel cannot be hot-removed because Linux diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c5970efa8557..ed556d50d7ed 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -126,14 +126,10 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) spin_lock_irqsave(&rtc_lock, flags); CMOS_WRITE(0xa, 0xf); spin_unlock_irqrestore(&rtc_lock, flags); - local_flush_tlb(); - pr_debug("1.\n"); *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = start_eip >> 4; - pr_debug("2.\n"); *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = start_eip & 0xf; - pr_debug("3.\n"); } static inline void smpboot_restore_warm_reset_vector(void) @@ -141,11 +137,6 @@ static inline void smpboot_restore_warm_reset_vector(void) unsigned long flags; /* - * Install writable page 0 entry to set BIOS data area. - */ - local_flush_tlb(); - - /* * Paranoid: Set warm reset code and vector here back * to default values. */ diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 77835bc021c7..093f2ea5dd56 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -102,7 +102,7 @@ __save_stack_trace_reliable(struct stack_trace *trace, for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state); unwind_next_frame(&state)) { - regs = unwind_get_entry_regs(&state); + regs = unwind_get_entry_regs(&state, NULL); if (regs) { /* * Kernel mode registers on the stack indicate an @@ -164,8 +164,12 @@ int save_stack_trace_tsk_reliable(struct task_struct *tsk, { int ret; + /* + * If the task doesn't have a stack (e.g., a zombie), the stack is + * "reliably" empty. + */ if (!try_get_task_stack(tsk)) - return -EINVAL; + return 0; ret = __save_stack_trace_reliable(trace, tsk); diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 9a9c9b076955..a5b802a12212 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -93,17 +93,10 @@ static void set_tls_desc(struct task_struct *p, int idx, cpu = get_cpu(); while (n-- > 0) { - if (LDT_empty(info) || LDT_zero(info)) { + if (LDT_empty(info) || LDT_zero(info)) memset(desc, 0, sizeof(*desc)); - } else { + else fill_ldt(desc, info); - - /* - * Always set the accessed bit so that the CPU - * doesn't try to write to the (read-only) GDT. - */ - desc->type |= 1; - } ++info; ++desc; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f69dbd47d733..446c9ef8cfc3 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -361,7 +361,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) * * No need for ist_enter here because we don't use RCU. */ - if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && + if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY && regs->cs == __KERNEL_CS && regs->ip == (unsigned long)native_irq_return_iret) { diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index d2a8b5a24a44..1e413a9326aa 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -61,11 +61,17 @@ jiffies_64 = jiffies; . = ALIGN(HPAGE_SIZE); \ __end_rodata_hpage_align = .; +#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE); +#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE); + #else #define X64_ALIGN_RODATA_BEGIN #define X64_ALIGN_RODATA_END +#define ALIGN_ENTRY_TEXT_BEGIN +#define ALIGN_ENTRY_TEXT_END + #endif PHDRS { @@ -102,8 +108,10 @@ SECTIONS CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT + ALIGN_ENTRY_TEXT_BEGIN ENTRY_TEXT IRQENTRY_TEXT + ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index eb714f1cdf7e..bb31c801f1fc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4986,6 +4986,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r14, %c[r14](%[svm]) \n\t" "mov %%r15, %c[r15](%[svm]) \n\t" #endif + /* + * Clear host registers marked as clobbered to prevent + * speculative use. + */ + "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t" + "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t" + "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t" + "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t" + "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t" +#ifdef CONFIG_X86_64 + "xor %%r8, %%r8 \n\t" + "xor %%r9, %%r9 \n\t" + "xor %%r10, %%r10 \n\t" + "xor %%r11, %%r11 \n\t" + "xor %%r12, %%r12 \n\t" + "xor %%r13, %%r13 \n\t" + "xor %%r14, %%r14 \n\t" + "xor %%r15, %%r15 \n\t" +#endif "pop %%" _ASM_BP : : [svm]"a"(svm), diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 023afa0c8887..5c14d65f676a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9415,6 +9415,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) /* Save guest registers, load host registers, keep flags */ "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" "pop %0 \n\t" + "setbe %c[fail](%0)\n\t" "mov %%" _ASM_AX ", %c[rax](%0) \n\t" "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" __ASM_SIZE(pop) " %c[rcx](%0) \n\t" @@ -9431,12 +9432,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r13, %c[r13](%0) \n\t" "mov %%r14, %c[r14](%0) \n\t" "mov %%r15, %c[r15](%0) \n\t" + "xor %%r8d, %%r8d \n\t" + "xor %%r9d, %%r9d \n\t" + "xor %%r10d, %%r10d \n\t" + "xor %%r11d, %%r11d \n\t" + "xor %%r12d, %%r12d \n\t" + "xor %%r13d, %%r13d \n\t" + "xor %%r14d, %%r14d \n\t" + "xor %%r15d, %%r15d \n\t" #endif "mov %%cr2, %%" _ASM_AX " \n\t" "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" + "xor %%eax, %%eax \n\t" + "xor %%ebx, %%ebx \n\t" + "xor %%esi, %%esi \n\t" + "xor %%edi, %%edi \n\t" "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" - "setbe %c[fail](%0) \n\t" ".pushsection .rodata \n\t" ".global vmx_return \n\t" "vmx_return: " _ASM_PTR " 2b \n\t" diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 52195ee3f6d5..27e9e90a8d35 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -41,9 +41,10 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o obj-$(CONFIG_ACPI_NUMA) += srat.o obj-$(CONFIG_NUMA_EMU) += numa_emulation.o -obj-$(CONFIG_X86_INTEL_MPX) += mpx.o -obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o -obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o +obj-$(CONFIG_X86_INTEL_MPX) += mpx.o +obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o +obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o +obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index fe814fd5e014..b9283cc27622 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -38,6 +38,32 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); } +static void percpu_setup_debug_store(int cpu) +{ +#ifdef CONFIG_CPU_SUP_INTEL + int npages; + void *cea; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return; + + cea = &get_cpu_entry_area(cpu)->cpu_debug_store; + npages = sizeof(struct debug_store) / PAGE_SIZE; + BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0); + cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages, + PAGE_KERNEL); + + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers; + /* + * Force the population of PMDs for not yet allocated per cpu + * memory like debug store buffers. + */ + npages = sizeof(struct debug_store_buffers) / PAGE_SIZE; + for (; npages; npages--, cea += PAGE_SIZE) + cea_set_pte(cea, 0, PAGE_NONE); +#endif +} + /* Setup the fixmap mappings only once per-processor */ static void __init setup_cpu_entry_area(int cpu) { @@ -109,6 +135,7 @@ static void __init setup_cpu_entry_area(int cpu) cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline, __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); #endif + percpu_setup_debug_store(cpu); } static __init void setup_cpu_entry_area_ptes(void) diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index bfcffdf6c577..421f2664ffa0 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c @@ -5,7 +5,7 @@ static int ptdump_show(struct seq_file *m, void *v) { - ptdump_walk_pgd_level(m, NULL); + ptdump_walk_pgd_level_debugfs(m, NULL, false); return 0; } @@ -22,21 +22,89 @@ static const struct file_operations ptdump_fops = { .release = single_release, }; -static struct dentry *pe; +static int ptdump_show_curknl(struct seq_file *m, void *v) +{ + if (current->mm->pgd) { + down_read(¤t->mm->mmap_sem); + ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false); + up_read(¤t->mm->mmap_sem); + } + return 0; +} + +static int ptdump_open_curknl(struct inode *inode, struct file *filp) +{ + return single_open(filp, ptdump_show_curknl, NULL); +} + +static const struct file_operations ptdump_curknl_fops = { + .owner = THIS_MODULE, + .open = ptdump_open_curknl, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +static struct dentry *pe_curusr; + +static int ptdump_show_curusr(struct seq_file *m, void *v) +{ + if (current->mm->pgd) { + down_read(¤t->mm->mmap_sem); + ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true); + up_read(¤t->mm->mmap_sem); + } + return 0; +} + +static int ptdump_open_curusr(struct inode *inode, struct file *filp) +{ + return single_open(filp, ptdump_show_curusr, NULL); +} + +static const struct file_operations ptdump_curusr_fops = { + .owner = THIS_MODULE, + .open = ptdump_open_curusr, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + +static struct dentry *dir, *pe_knl, *pe_curknl; static int __init pt_dump_debug_init(void) { - pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL, - &ptdump_fops); - if (!pe) + dir = debugfs_create_dir("page_tables", NULL); + if (!dir) return -ENOMEM; + pe_knl = debugfs_create_file("kernel", 0400, dir, NULL, + &ptdump_fops); + if (!pe_knl) + goto err; + + pe_curknl = debugfs_create_file("current_kernel", 0400, + dir, NULL, &ptdump_curknl_fops); + if (!pe_curknl) + goto err; + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + pe_curusr = debugfs_create_file("current_user", 0400, + dir, NULL, &ptdump_curusr_fops); + if (!pe_curusr) + goto err; +#endif return 0; +err: + debugfs_remove_recursive(dir); + return -ENOMEM; } static void __exit pt_dump_debug_exit(void) { - debugfs_remove_recursive(pe); + debugfs_remove_recursive(dir); } module_init(pt_dump_debug_init); diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 43dedbfb7257..2a4849e92831 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -52,6 +52,9 @@ enum address_markers_idx { USER_SPACE_NR = 0, KERNEL_SPACE_NR, LOW_KERNEL_NR, +#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL) + LDT_NR, +#endif VMALLOC_START_NR, VMEMMAP_START_NR, #ifdef CONFIG_KASAN @@ -59,6 +62,9 @@ enum address_markers_idx { KASAN_SHADOW_END_NR, #endif CPU_ENTRY_AREA_NR, +#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL) + LDT_NR, +#endif #ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, #endif @@ -82,6 +88,9 @@ static struct addr_marker address_markers[] = { [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" }, [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" }, #endif +#ifdef CONFIG_MODIFY_LDT_SYSCALL + [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" }, +#endif [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, #ifdef CONFIG_X86_ESPFIX64 [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, @@ -467,7 +476,7 @@ static inline bool is_hypervisor_range(int idx) } static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, - bool checkwx) + bool checkwx, bool dmesg) { #ifdef CONFIG_X86_64 pgd_t *start = (pgd_t *) &init_top_pgt; @@ -480,7 +489,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, if (pgd) { start = pgd; - st.to_dmesg = true; + st.to_dmesg = dmesg; } st.check_wx = checkwx; @@ -518,13 +527,37 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) { - ptdump_walk_pgd_level_core(m, pgd, false); + ptdump_walk_pgd_level_core(m, pgd, false, true); +} + +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + if (user && static_cpu_has(X86_FEATURE_PTI)) + pgd = kernel_to_user_pgdp(pgd); +#endif + ptdump_walk_pgd_level_core(m, pgd, false, false); +} +EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); + +static void ptdump_walk_user_pgd_level_checkwx(void) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + pgd_t *pgd = (pgd_t *) &init_top_pgt; + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + pr_info("x86/mm: Checking user space page tables\n"); + pgd = kernel_to_user_pgdp(pgd); + ptdump_walk_pgd_level_core(NULL, pgd, true, false); +#endif } -EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level); void ptdump_walk_pgd_level_checkwx(void) { - ptdump_walk_pgd_level_core(NULL, NULL, true); + ptdump_walk_pgd_level_core(NULL, NULL, true, false); + ptdump_walk_user_pgd_level_checkwx(); } static int __init pt_dump_init(void) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6fdf91ef130a..82f5252c723a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -20,6 +20,7 @@ #include <asm/kaslr.h> #include <asm/hypervisor.h> #include <asm/cpufeature.h> +#include <asm/pti.h> /* * We need to define the tracepoints somewhere, and tlb.c @@ -160,6 +161,12 @@ struct map_range { static int page_size_mask; +static void enable_global_pages(void) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + __supported_pte_mask |= _PAGE_GLOBAL; +} + static void __init probe_page_size_mask(void) { /* @@ -177,11 +184,11 @@ static void __init probe_page_size_mask(void) cr4_set_bits_and_update_boot(X86_CR4_PSE); /* Enable PGE if available */ + __supported_pte_mask &= ~_PAGE_GLOBAL; if (boot_cpu_has(X86_FEATURE_PGE)) { cr4_set_bits_and_update_boot(X86_CR4_PGE); - __supported_pte_mask |= _PAGE_GLOBAL; - } else - __supported_pte_mask &= ~_PAGE_GLOBAL; + enable_global_pages(); + } /* Enable 1 GB linear kernel mappings if available: */ if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) { @@ -194,34 +201,44 @@ static void __init probe_page_size_mask(void) static void setup_pcid(void) { -#ifdef CONFIG_X86_64 - if (boot_cpu_has(X86_FEATURE_PCID)) { - if (boot_cpu_has(X86_FEATURE_PGE)) { - /* - * This can't be cr4_set_bits_and_update_boot() -- - * the trampoline code can't handle CR4.PCIDE and - * it wouldn't do any good anyway. Despite the name, - * cr4_set_bits_and_update_boot() doesn't actually - * cause the bits in question to remain set all the - * way through the secondary boot asm. - * - * Instead, we brute-force it and set CR4.PCIDE - * manually in start_secondary(). - */ - cr4_set_bits(X86_CR4_PCIDE); - } else { - /* - * flush_tlb_all(), as currently implemented, won't - * work if PCID is on but PGE is not. Since that - * combination doesn't exist on real hardware, there's - * no reason to try to fully support it, but it's - * polite to avoid corrupting data if we're on - * an improperly configured VM. - */ - setup_clear_cpu_cap(X86_FEATURE_PCID); - } + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + if (!boot_cpu_has(X86_FEATURE_PCID)) + return; + + if (boot_cpu_has(X86_FEATURE_PGE)) { + /* + * This can't be cr4_set_bits_and_update_boot() -- the + * trampoline code can't handle CR4.PCIDE and it wouldn't + * do any good anyway. Despite the name, + * cr4_set_bits_and_update_boot() doesn't actually cause + * the bits in question to remain set all the way through + * the secondary boot asm. + * + * Instead, we brute-force it and set CR4.PCIDE manually in + * start_secondary(). + */ + cr4_set_bits(X86_CR4_PCIDE); + + /* + * INVPCID's single-context modes (2/3) only work if we set + * X86_CR4_PCIDE, *and* we INVPCID support. It's unusable + * on systems that have X86_CR4_PCIDE clear, or that have + * no INVPCID support at all. + */ + if (boot_cpu_has(X86_FEATURE_INVPCID)) + setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE); + } else { + /* + * flush_tlb_all(), as currently implemented, won't work if + * PCID is on but PGE is not. Since that combination + * doesn't exist on real hardware, there's no reason to try + * to fully support it, but it's polite to avoid corrupting + * data if we're on an improperly configured VM. + */ + setup_clear_cpu_cap(X86_FEATURE_PCID); } -#endif } #ifdef CONFIG_X86_32 @@ -622,6 +639,7 @@ void __init init_mem_mapping(void) { unsigned long end; + pti_check_boottime_disable(); probe_page_size_mask(); setup_pcid(); @@ -845,12 +863,12 @@ void __init zone_sizes_init(void) free_area_init_nodes(max_zone_pfns); } -DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { +__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { .loaded_mm = &init_mm, .next_asid = 1, .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ }; -EXPORT_SYMBOL_GPL(cpu_tlbstate); +EXPORT_PER_CPU_SYMBOL(cpu_tlbstate); void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) { diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 879ef930e2c2..aedebd2ebf1e 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -34,25 +34,14 @@ #define TB_SHIFT 40 /* - * Virtual address start and end range for randomization. The end changes base - * on configuration to have the highest amount of space for randomization. - * It increases the possible random position for each randomized region. + * Virtual address start and end range for randomization. * - * You need to add an if/def entry if you introduce a new memory region - * compatible with KASLR. Your entry must be in logical order with memory - * layout. For example, ESPFIX is before EFI because its virtual address is - * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to - * ensure that this order is correct and won't be changed. + * The end address could depend on more configuration options to make the + * highest amount of space for randomization available, but that's too hard + * to keep straight and caused issues already. */ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; - -#if defined(CONFIG_X86_ESPFIX64) -static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; -#elif defined(CONFIG_EFI) -static const unsigned long vaddr_end = EFI_VA_END; -#else -static const unsigned long vaddr_end = __START_KERNEL_map; -#endif +static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE; /* Default values */ unsigned long page_offset_base = __PAGE_OFFSET_BASE; @@ -101,15 +90,12 @@ void __init kernel_randomize_memory(void) unsigned long remain_entropy; /* - * All these BUILD_BUG_ON checks ensures the memory layout is - * consistent with the vaddr_start/vaddr_end variables. + * These BUILD_BUG_ON checks ensure the memory layout is consistent + * with the vaddr_start/vaddr_end variables. These checks are very + * limited.... */ BUILD_BUG_ON(vaddr_start >= vaddr_end); - BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && - vaddr_end >= EFI_VA_END); - BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || - IS_ENABLED(CONFIG_EFI)) && - vaddr_end >= __START_KERNEL_map); + BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE); BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); if (!kaslr_memory_enabled()) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index d9a9e9fc75dd..391b13402e40 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -405,13 +405,13 @@ bool sme_active(void) { return sme_me_mask && !sev_enabled; } -EXPORT_SYMBOL_GPL(sme_active); +EXPORT_SYMBOL(sme_active); bool sev_active(void) { return sme_me_mask && sev_enabled; } -EXPORT_SYMBOL_GPL(sev_active); +EXPORT_SYMBOL(sev_active); static const struct dma_map_ops sev_dma_ops = { .alloc = sev_alloc, diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 96d456a94b03..004abf9ebf12 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd) kmem_cache_free(pgd_cache, pgd); } #else + static inline pgd_t *_pgd_alloc(void) { - return (pgd_t *)__get_free_page(PGALLOC_GFP); + return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER); } static inline void _pgd_free(pgd_t *pgd) { - free_page((unsigned long)pgd); + free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); } #endif /* CONFIG_X86_PAE */ diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c new file mode 100644 index 000000000000..43d4a4a29037 --- /dev/null +++ b/arch/x86/mm/pti.c @@ -0,0 +1,388 @@ +/* + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * This code is based in part on work published here: + * + * https://github.com/IAIK/KAISER + * + * The original work was written by and and signed off by for the Linux + * kernel by: + * + * Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at> + * Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at> + * Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at> + * Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at> + * + * Major changes to the original code by: Dave Hansen <dave.hansen@intel.com> + * Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and + * Andy Lutomirsky <luto@amacapital.net> + */ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/bug.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/uaccess.h> + +#include <asm/cpufeature.h> +#include <asm/hypervisor.h> +#include <asm/vsyscall.h> +#include <asm/cmdline.h> +#include <asm/pti.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> +#include <asm/desc.h> + +#undef pr_fmt +#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt + +/* Backporting helper */ +#ifndef __GFP_NOTRACK +#define __GFP_NOTRACK 0 +#endif + +static void __init pti_print_if_insecure(const char *reason) +{ + if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + pr_info("%s\n", reason); +} + +static void __init pti_print_if_secure(const char *reason) +{ + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + pr_info("%s\n", reason); +} + +void __init pti_check_boottime_disable(void) +{ + char arg[5]; + int ret; + + if (hypervisor_is_type(X86_HYPER_XEN_PV)) { + pti_print_if_insecure("disabled on XEN PV."); + return; + } + + ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); + if (ret > 0) { + if (ret == 3 && !strncmp(arg, "off", 3)) { + pti_print_if_insecure("disabled on command line."); + return; + } + if (ret == 2 && !strncmp(arg, "on", 2)) { + pti_print_if_secure("force enabled on command line."); + goto enable; + } + if (ret == 4 && !strncmp(arg, "auto", 4)) + goto autosel; + } + + if (cmdline_find_option_bool(boot_command_line, "nopti")) { + pti_print_if_insecure("disabled on command line."); + return; + } + +autosel: + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + return; +enable: + setup_force_cpu_cap(X86_FEATURE_PTI); +} + +pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +{ + /* + * Changes to the high (kernel) portion of the kernelmode page + * tables are not automatically propagated to the usermode tables. + * + * Users should keep in mind that, unlike the kernelmode tables, + * there is no vmalloc_fault equivalent for the usermode tables. + * Top-level entries added to init_mm's usermode pgd after boot + * will not be automatically propagated to other mms. + */ + if (!pgdp_maps_userspace(pgdp)) + return pgd; + + /* + * The user page tables get the full PGD, accessible from + * userspace: + */ + kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd; + + /* + * If this is normal user memory, make it NX in the kernel + * pagetables so that, if we somehow screw up and return to + * usermode with the kernel CR3 loaded, we'll get a page fault + * instead of allowing user code to execute with the wrong CR3. + * + * As exceptions, we don't set NX if: + * - _PAGE_USER is not set. This could be an executable + * EFI runtime mapping or something similar, and the kernel + * may execute from it + * - we don't have NX support + * - we're clearing the PGD (i.e. the new pgd is not present). + */ + if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) && + (__supported_pte_mask & _PAGE_NX)) + pgd.pgd |= _PAGE_NX; + + /* return the copy of the PGD we want the kernel to use: */ + return pgd; +} + +/* + * Walk the user copy of the page tables (optionally) trying to allocate + * page table pages on the way down. + * + * Returns a pointer to a P4D on success, or NULL on failure. + */ +static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) +{ + pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address)); + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + + if (address < PAGE_OFFSET) { + WARN_ONCE(1, "attempt to walk user address\n"); + return NULL; + } + + if (pgd_none(*pgd)) { + unsigned long new_p4d_page = __get_free_page(gfp); + if (!new_p4d_page) + return NULL; + + if (pgd_none(*pgd)) { + set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); + new_p4d_page = 0; + } + if (new_p4d_page) + free_page(new_p4d_page); + } + BUILD_BUG_ON(pgd_large(*pgd) != 0); + + return p4d_offset(pgd, address); +} + +/* + * Walk the user copy of the page tables (optionally) trying to allocate + * page table pages on the way down. + * + * Returns a pointer to a PMD on success, or NULL on failure. + */ +static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) +{ + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + p4d_t *p4d = pti_user_pagetable_walk_p4d(address); + pud_t *pud; + + BUILD_BUG_ON(p4d_large(*p4d) != 0); + if (p4d_none(*p4d)) { + unsigned long new_pud_page = __get_free_page(gfp); + if (!new_pud_page) + return NULL; + + if (p4d_none(*p4d)) { + set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); + new_pud_page = 0; + } + if (new_pud_page) + free_page(new_pud_page); + } + + pud = pud_offset(p4d, address); + /* The user page tables do not use large mappings: */ + if (pud_large(*pud)) { + WARN_ON(1); + return NULL; + } + if (pud_none(*pud)) { + unsigned long new_pmd_page = __get_free_page(gfp); + if (!new_pmd_page) + return NULL; + + if (pud_none(*pud)) { + set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); + new_pmd_page = 0; + } + if (new_pmd_page) + free_page(new_pmd_page); + } + + return pmd_offset(pud, address); +} + +#ifdef CONFIG_X86_VSYSCALL_EMULATION +/* + * Walk the shadow copy of the page tables (optionally) trying to allocate + * page table pages on the way down. Does not support large pages. + * + * Note: this is only used when mapping *new* kernel data into the + * user/shadow page tables. It is never used for userspace data. + * + * Returns a pointer to a PTE on success, or NULL on failure. + */ +static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) +{ + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + pmd_t *pmd = pti_user_pagetable_walk_pmd(address); + pte_t *pte; + + /* We can't do anything sensible if we hit a large mapping. */ + if (pmd_large(*pmd)) { + WARN_ON(1); + return NULL; + } + + if (pmd_none(*pmd)) { + unsigned long new_pte_page = __get_free_page(gfp); + if (!new_pte_page) + return NULL; + + if (pmd_none(*pmd)) { + set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); + new_pte_page = 0; + } + if (new_pte_page) + free_page(new_pte_page); + } + + pte = pte_offset_kernel(pmd, address); + if (pte_flags(*pte) & _PAGE_USER) { + WARN_ONCE(1, "attempt to walk to user pte\n"); + return NULL; + } + return pte; +} + +static void __init pti_setup_vsyscall(void) +{ + pte_t *pte, *target_pte; + unsigned int level; + + pte = lookup_address(VSYSCALL_ADDR, &level); + if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte)) + return; + + target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR); + if (WARN_ON(!target_pte)) + return; + + *target_pte = *pte; + set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir)); +} +#else +static void __init pti_setup_vsyscall(void) { } +#endif + +static void __init +pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear) +{ + unsigned long addr; + + /* + * Clone the populated PMDs which cover start to end. These PMD areas + * can have holes. + */ + for (addr = start; addr < end; addr += PMD_SIZE) { + pmd_t *pmd, *target_pmd; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + + pgd = pgd_offset_k(addr); + if (WARN_ON(pgd_none(*pgd))) + return; + p4d = p4d_offset(pgd, addr); + if (WARN_ON(p4d_none(*p4d))) + return; + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) + continue; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + continue; + + target_pmd = pti_user_pagetable_walk_pmd(addr); + if (WARN_ON(!target_pmd)) + return; + + /* + * Copy the PMD. That is, the kernelmode and usermode + * tables will share the last-level page tables of this + * address range + */ + *target_pmd = pmd_clear_flags(*pmd, clear); + } +} + +/* + * Clone a single p4d (i.e. a top-level entry on 4-level systems and a + * next-level entry on 5-level systems. + */ +static void __init pti_clone_p4d(unsigned long addr) +{ + p4d_t *kernel_p4d, *user_p4d; + pgd_t *kernel_pgd; + + user_p4d = pti_user_pagetable_walk_p4d(addr); + kernel_pgd = pgd_offset_k(addr); + kernel_p4d = p4d_offset(kernel_pgd, addr); + *user_p4d = *kernel_p4d; +} + +/* + * Clone the CPU_ENTRY_AREA into the user space visible page table. + */ +static void __init pti_clone_user_shared(void) +{ + pti_clone_p4d(CPU_ENTRY_AREA_BASE); +} + +/* + * Clone the ESPFIX P4D into the user space visinble page table + */ +static void __init pti_setup_espfix64(void) +{ +#ifdef CONFIG_X86_ESPFIX64 + pti_clone_p4d(ESPFIX_BASE_ADDR); +#endif +} + +/* + * Clone the populated PMDs of the entry and irqentry text and force it RO. + */ +static void __init pti_clone_entry_text(void) +{ + pti_clone_pmds((unsigned long) __entry_text_start, + (unsigned long) __irqentry_text_end, + _PAGE_RW | _PAGE_GLOBAL); +} + +/* + * Initialize kernel page table isolation + */ +void __init pti_init(void) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + pr_info("enabled\n"); + + pti_clone_user_shared(); + pti_clone_entry_text(); + pti_setup_espfix64(); + pti_setup_vsyscall(); +} diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 0a1be3adc97e..a1561957dccb 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -28,6 +28,38 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ +/* + * We get here when we do something requiring a TLB invalidation + * but could not go invalidate all of the contexts. We do the + * necessary invalidation by clearing out the 'ctx_id' which + * forces a TLB flush when the context is loaded. + */ +void clear_asid_other(void) +{ + u16 asid; + + /* + * This is only expected to be set if we have disabled + * kernel _PAGE_GLOBAL pages. + */ + if (!static_cpu_has(X86_FEATURE_PTI)) { + WARN_ON_ONCE(1); + return; + } + + for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { + /* Do not need to flush the current asid */ + if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid)) + continue; + /* + * Make sure the next time we go to switch to + * this asid, we do a flush: + */ + this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0); + } + this_cpu_write(cpu_tlbstate.invalidate_other, false); +} + atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); @@ -42,6 +74,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, return; } + if (this_cpu_read(cpu_tlbstate.invalidate_other)) + clear_asid_other(); + for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) != next->context.ctx_id) @@ -65,6 +100,25 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, *need_flush = true; } +static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush) +{ + unsigned long new_mm_cr3; + + if (need_flush) { + invalidate_user_asid(new_asid); + new_mm_cr3 = build_cr3(pgdir, new_asid); + } else { + new_mm_cr3 = build_cr3_noflush(pgdir, new_asid); + } + + /* + * Caution: many callers of this function expect + * that load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask writes. + */ + write_cr3(new_mm_cr3); +} + void leave_mm(int cpu) { struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); @@ -195,7 +249,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (need_flush) { this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); - write_cr3(build_cr3(next->pgd, new_asid)); + load_new_mm_cr3(next->pgd, new_asid, true); /* * NB: This gets called via leave_mm() in the idle path @@ -208,7 +262,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ - write_cr3(build_cr3_noflush(next->pgd, new_asid)); + load_new_mm_cr3(next->pgd, new_asid, false); /* See above wrt _rcuidle. */ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 6a151ce70e86..d87ac96e37ed 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -196,6 +196,9 @@ static pgd_t *efi_pgd; * because we want to avoid inserting EFI region mappings (EFI_VA_END * to EFI_VA_START) into the standard kernel page tables. Everything * else can be shared, see efi_sync_low_kernel_mappings(). + * + * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the + * allocation. */ int __init efi_alloc_page_tables(void) { @@ -208,7 +211,7 @@ int __init efi_alloc_page_tables(void) return 0; gfp_mask = GFP_KERNEL | __GFP_ZERO; - efi_pgd = (pgd_t *)__get_free_page(gfp_mask); + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); if (!efi_pgd) return -ENOMEM; diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 8a99a2e96537..5b513ccffde4 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -592,7 +592,18 @@ static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff, /* * Update the first page pointer to skip over the CSH header. */ - cap_info->pages[0] += csh->headersize; + cap_info->phys[0] += csh->headersize; + + /* + * cap_info->capsule should point at a virtual mapping of the entire + * capsule, starting at the capsule header. Our image has the Quark + * security header prepended, so we cannot rely on the default vmap() + * mapping created by the generic capsule code. + * Given that the Quark firmware does not appear to care about the + * virtual mapping, let's just point cap_info->capsule at our copy + * of the capsule header. + */ + cap_info->capsule = &cap_info->header; return 1; } diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 5f6fd860820a..e4cb9f4cde8a 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -128,7 +128,7 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq, * on the specified blade to allow the sending of MSIs to the specified CPU. */ static int uv_domain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); return 0; diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 444a387df219..35d4dcea381f 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -664,7 +664,7 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq) unsigned int i; list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) { - ctx->rcvused -= rsgl->sg_num_bytes; + atomic_sub(rsgl->sg_num_bytes, &ctx->rcvused); af_alg_free_sg(&rsgl->sgl); list_del(&rsgl->list); if (rsgl != &areq->first_rsgl) @@ -1163,7 +1163,7 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, areq->last_rsgl = rsgl; len += err; - ctx->rcvused += err; + atomic_add(err, &ctx->rcvused); rsgl->sg_num_bytes = err; iov_iter_advance(&msg->msg_iter, err); } diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index ddcc45f77edd..e9885a35ef6e 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -571,7 +571,7 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk) INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; ctx->used = 0; - ctx->rcvused = 0; + atomic_set(&ctx->rcvused, 0); ctx->more = 0; ctx->merge = 0; ctx->enc = 0; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index baef9bfccdda..c5c47b680152 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -390,7 +390,7 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk) INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; ctx->used = 0; - ctx->rcvused = 0; + atomic_set(&ctx->rcvused, 0); ctx->more = 0; ctx->merge = 0; ctx->enc = 0; diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index db1bc3147bc4..600afa99941f 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -610,6 +610,11 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, algt->mask)); if (IS_ERR(poly)) return PTR_ERR(poly); + poly_hash = __crypto_hash_alg_common(poly); + + err = -EINVAL; + if (poly_hash->digestsize != POLY1305_DIGEST_SIZE) + goto out_put_poly; err = -ENOMEM; inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); @@ -618,7 +623,6 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, ctx = aead_instance_ctx(inst); ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize; - poly_hash = __crypto_hash_alg_common(poly); err = crypto_init_ahash_spawn(&ctx->poly, poly_hash, aead_crypto_instance(inst)); if (err) diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index ee9cfb99fe25..f8ec3d4ba4a8 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -254,6 +254,14 @@ static void pcrypt_aead_exit_tfm(struct crypto_aead *tfm) crypto_free_aead(ctx->child); } +static void pcrypt_free(struct aead_instance *inst) +{ + struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst); + + crypto_drop_aead(&ctx->spawn); + kfree(inst); +} + static int pcrypt_init_instance(struct crypto_instance *inst, struct crypto_alg *alg) { @@ -319,6 +327,8 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.encrypt = pcrypt_aead_encrypt; inst->alg.decrypt = pcrypt_aead_decrypt; + inst->free = pcrypt_free; + err = aead_register_instance(tmpl, inst); if (err) goto out_drop_aead; @@ -349,14 +359,6 @@ static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb) return -EINVAL; } -static void pcrypt_free(struct crypto_instance *inst) -{ - struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst); - - crypto_drop_aead(&ctx->spawn); - kfree(inst); -} - static int pcrypt_cpumask_change_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -469,7 +471,6 @@ static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt) static struct crypto_template pcrypt_tmpl = { .name = "pcrypt", .create = pcrypt_create, - .free = pcrypt_free, .module = THIS_MODULE, }; diff --git a/drivers/android/binder.c b/drivers/android/binder.c index bccec9de0533..a7ecfde66b7b 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -482,7 +482,8 @@ enum binder_deferred_state { * @tsk task_struct for group_leader of process * (invariant after initialized) * @files files_struct for process - * (invariant after initialized) + * (protected by @files_lock) + * @files_lock mutex to protect @files * @deferred_work_node: element for binder_deferred_list * (protected by binder_deferred_lock) * @deferred_work: bitmap of deferred work to perform @@ -530,6 +531,7 @@ struct binder_proc { int pid; struct task_struct *tsk; struct files_struct *files; + struct mutex files_lock; struct hlist_node deferred_work_node; int deferred_work; bool is_dead; @@ -877,20 +879,26 @@ static void binder_inc_node_tmpref_ilocked(struct binder_node *node); static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) { - struct files_struct *files = proc->files; unsigned long rlim_cur; unsigned long irqs; + int ret; - if (files == NULL) - return -ESRCH; - - if (!lock_task_sighand(proc->tsk, &irqs)) - return -EMFILE; - + mutex_lock(&proc->files_lock); + if (proc->files == NULL) { + ret = -ESRCH; + goto err; + } + if (!lock_task_sighand(proc->tsk, &irqs)) { + ret = -EMFILE; + goto err; + } rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); unlock_task_sighand(proc->tsk, &irqs); - return __alloc_fd(files, 0, rlim_cur, flags); + ret = __alloc_fd(proc->files, 0, rlim_cur, flags); +err: + mutex_unlock(&proc->files_lock); + return ret; } /* @@ -899,8 +907,10 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) static void task_fd_install( struct binder_proc *proc, unsigned int fd, struct file *file) { + mutex_lock(&proc->files_lock); if (proc->files) __fd_install(proc->files, fd, file); + mutex_unlock(&proc->files_lock); } /* @@ -910,9 +920,11 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) { int retval; - if (proc->files == NULL) - return -ESRCH; - + mutex_lock(&proc->files_lock); + if (proc->files == NULL) { + retval = -ESRCH; + goto err; + } retval = __close_fd(proc->files, fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || @@ -920,7 +932,8 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) retval == -ERESTARTNOHAND || retval == -ERESTART_RESTARTBLOCK)) retval = -EINTR; - +err: + mutex_unlock(&proc->files_lock); return retval; } @@ -4627,7 +4640,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) ret = binder_alloc_mmap_handler(&proc->alloc, vma); if (ret) return ret; + mutex_lock(&proc->files_lock); proc->files = get_files_struct(current); + mutex_unlock(&proc->files_lock); return 0; err_bad_arg: @@ -4651,6 +4666,7 @@ static int binder_open(struct inode *nodp, struct file *filp) spin_lock_init(&proc->outer_lock); get_task_struct(current->group_leader); proc->tsk = current->group_leader; + mutex_init(&proc->files_lock); INIT_LIST_HEAD(&proc->todo); proc->default_priority = task_nice(current); binder_dev = container_of(filp->private_data, struct binder_device, @@ -4903,9 +4919,11 @@ static void binder_deferred_func(struct work_struct *work) files = NULL; if (defer & BINDER_DEFERRED_PUT_FILES) { + mutex_lock(&proc->files_lock); files = proc->files; if (files) proc->files = NULL; + mutex_unlock(&proc->files_lock); } if (defer & BINDER_DEFERRED_FLUSH) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index eb3af2739537..07532d83be0b 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -186,6 +186,11 @@ static void cache_associativity(struct cacheinfo *this_leaf) this_leaf->ways_of_associativity = (size / nr_sets) / line_size; } +static bool cache_node_is_unified(struct cacheinfo *this_leaf) +{ + return of_property_read_bool(this_leaf->of_node, "cache-unified"); +} + static void cache_of_override_properties(unsigned int cpu) { int index; @@ -194,6 +199,14 @@ static void cache_of_override_properties(unsigned int cpu) for (index = 0; index < cache_leaves(cpu); index++) { this_leaf = this_cpu_ci->info_list + index; + /* + * init_cache_level must setup the cache level correctly + * overriding the architecturally specified levels, so + * if type is NONE at this stage, it should be unified + */ + if (this_leaf->type == CACHE_TYPE_NOCACHE && + cache_node_is_unified(this_leaf)) + this_leaf->type = CACHE_TYPE_UNIFIED; cache_size(this_leaf); cache_get_line_size(this_leaf); cache_nr_sets(this_leaf); diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 328ca93781cf..1b76d9585902 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -178,6 +178,7 @@ static struct bus_type sunxi_rsb_bus = { .match = sunxi_rsb_device_match, .probe = sunxi_rsb_device_probe, .remove = sunxi_rsb_device_remove, + .uevent = of_device_uevent_modalias, }; static void sunxi_rsb_dev_release(struct device *dev) diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig index 3e104f5aa0c2..b56b3f711d94 100644 --- a/drivers/crypto/chelsio/Kconfig +++ b/drivers/crypto/chelsio/Kconfig @@ -5,6 +5,7 @@ config CRYPTO_DEV_CHELSIO select CRYPTO_SHA256 select CRYPTO_SHA512 select CRYPTO_AUTHENC + select CRYPTO_GF128MUL ---help--- The Chelsio Crypto Co-processor driver for T6 adapters. diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index 89ba9e85c0f3..4bcef78a08aa 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -607,6 +607,7 @@ static inline void safexcel_handle_result_descriptor(struct safexcel_crypto_priv ndesc = ctx->handle_result(priv, ring, sreq->req, &should_complete, &ret); if (ndesc < 0) { + kfree(sreq); dev_err(priv->dev, "failed to handle result (%d)", ndesc); return; } diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c index 5438552bc6d7..fcc0a606d748 100644 --- a/drivers/crypto/inside-secure/safexcel_cipher.c +++ b/drivers/crypto/inside-secure/safexcel_cipher.c @@ -14,6 +14,7 @@ #include <crypto/aes.h> #include <crypto/skcipher.h> +#include <crypto/internal/skcipher.h> #include "safexcel.h" @@ -33,6 +34,10 @@ struct safexcel_cipher_ctx { unsigned int key_len; }; +struct safexcel_cipher_req { + bool needs_inv; +}; + static void safexcel_cipher_token(struct safexcel_cipher_ctx *ctx, struct crypto_async_request *async, struct safexcel_command_desc *cdesc, @@ -126,9 +131,9 @@ static int safexcel_context_control(struct safexcel_cipher_ctx *ctx, return 0; } -static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, - struct crypto_async_request *async, - bool *should_complete, int *ret) +static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) { struct skcipher_request *req = skcipher_request_cast(async); struct safexcel_result_desc *rdesc; @@ -265,7 +270,6 @@ static int safexcel_aes_send(struct crypto_async_request *async, spin_unlock_bh(&priv->ring[ring].egress_lock); request->req = &req->base; - ctx->base.handle_result = safexcel_handle_result; *commands = n_cdesc; *results = n_rdesc; @@ -341,8 +345,6 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, ring = safexcel_select_ring(priv); ctx->base.ring = ring; - ctx->base.needs_inv = false; - ctx->base.send = safexcel_aes_send; spin_lock_bh(&priv->ring[ring].queue_lock); enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async); @@ -359,6 +361,26 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, return ndesc; } +static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) +{ + struct skcipher_request *req = skcipher_request_cast(async); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); + int err; + + if (sreq->needs_inv) { + sreq->needs_inv = false; + err = safexcel_handle_inv_result(priv, ring, async, + should_complete, ret); + } else { + err = safexcel_handle_req_result(priv, ring, async, + should_complete, ret); + } + + return err; +} + static int safexcel_cipher_send_inv(struct crypto_async_request *async, int ring, struct safexcel_request *request, int *commands, int *results) @@ -368,8 +390,6 @@ static int safexcel_cipher_send_inv(struct crypto_async_request *async, struct safexcel_crypto_priv *priv = ctx->priv; int ret; - ctx->base.handle_result = safexcel_handle_inv_result; - ret = safexcel_invalidate_cache(async, &ctx->base, priv, ctx->base.ctxr_dma, ring, request); if (unlikely(ret)) @@ -381,28 +401,46 @@ static int safexcel_cipher_send_inv(struct crypto_async_request *async, return 0; } +static int safexcel_send(struct crypto_async_request *async, + int ring, struct safexcel_request *request, + int *commands, int *results) +{ + struct skcipher_request *req = skcipher_request_cast(async); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); + int ret; + + if (sreq->needs_inv) + ret = safexcel_cipher_send_inv(async, ring, request, + commands, results); + else + ret = safexcel_aes_send(async, ring, request, + commands, results); + return ret; +} + static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm) { struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_crypto_priv *priv = ctx->priv; - struct skcipher_request req; + SKCIPHER_REQUEST_ON_STACK(req, __crypto_skcipher_cast(tfm)); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); struct safexcel_inv_result result = {}; int ring = ctx->base.ring; - memset(&req, 0, sizeof(struct skcipher_request)); + memset(req, 0, sizeof(struct skcipher_request)); /* create invalidation request */ init_completion(&result.completion); - skcipher_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG, - safexcel_inv_complete, &result); + skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + safexcel_inv_complete, &result); - skcipher_request_set_tfm(&req, __crypto_skcipher_cast(tfm)); - ctx = crypto_tfm_ctx(req.base.tfm); + skcipher_request_set_tfm(req, __crypto_skcipher_cast(tfm)); + ctx = crypto_tfm_ctx(req->base.tfm); ctx->base.exit_inv = true; - ctx->base.send = safexcel_cipher_send_inv; + sreq->needs_inv = true; spin_lock_bh(&priv->ring[ring].queue_lock); - crypto_enqueue_request(&priv->ring[ring].queue, &req.base); + crypto_enqueue_request(&priv->ring[ring].queue, &req->base); spin_unlock_bh(&priv->ring[ring].queue_lock); if (!priv->ring[ring].need_dequeue) @@ -424,19 +462,21 @@ static int safexcel_aes(struct skcipher_request *req, enum safexcel_cipher_direction dir, u32 mode) { struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); struct safexcel_crypto_priv *priv = ctx->priv; int ret, ring; + sreq->needs_inv = false; ctx->direction = dir; ctx->mode = mode; if (ctx->base.ctxr) { - if (ctx->base.needs_inv) - ctx->base.send = safexcel_cipher_send_inv; + if (ctx->base.needs_inv) { + sreq->needs_inv = true; + ctx->base.needs_inv = false; + } } else { ctx->base.ring = safexcel_select_ring(priv); - ctx->base.send = safexcel_aes_send; - ctx->base.ctxr = dma_pool_zalloc(priv->context_pool, EIP197_GFP_FLAGS(req->base), &ctx->base.ctxr_dma); @@ -476,6 +516,11 @@ static int safexcel_skcipher_cra_init(struct crypto_tfm *tfm) alg.skcipher.base); ctx->priv = tmpl->priv; + ctx->base.send = safexcel_send; + ctx->base.handle_result = safexcel_handle_result; + + crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm), + sizeof(struct safexcel_cipher_req)); return 0; } diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 74feb6227101..0c5a5820b06e 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -32,9 +32,10 @@ struct safexcel_ahash_req { bool last_req; bool finish; bool hmac; + bool needs_inv; u8 state_sz; /* expected sate size, only set once */ - u32 state[SHA256_DIGEST_SIZE / sizeof(u32)]; + u32 state[SHA256_DIGEST_SIZE / sizeof(u32)] __aligned(sizeof(u32)); u64 len; u64 processed; @@ -119,15 +120,15 @@ static void safexcel_context_control(struct safexcel_ahash_ctx *ctx, } } -static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, - struct crypto_async_request *async, - bool *should_complete, int *ret) +static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) { struct safexcel_result_desc *rdesc; struct ahash_request *areq = ahash_request_cast(async); struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq); struct safexcel_ahash_req *sreq = ahash_request_ctx(areq); - int cache_len, result_sz = sreq->state_sz; + int cache_len; *ret = 0; @@ -148,8 +149,8 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, spin_unlock_bh(&priv->ring[ring].egress_lock); if (sreq->finish) - result_sz = crypto_ahash_digestsize(ahash); - memcpy(sreq->state, areq->result, result_sz); + memcpy(areq->result, sreq->state, + crypto_ahash_digestsize(ahash)); dma_unmap_sg(priv->dev, areq->src, sg_nents_for_len(areq->src, areq->nbytes), DMA_TO_DEVICE); @@ -165,9 +166,9 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, return 1; } -static int safexcel_ahash_send(struct crypto_async_request *async, int ring, - struct safexcel_request *request, int *commands, - int *results) +static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring, + struct safexcel_request *request, + int *commands, int *results) { struct ahash_request *areq = ahash_request_cast(async); struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq); @@ -273,7 +274,7 @@ send_command: /* Add the token */ safexcel_hash_token(first_cdesc, len, req->state_sz); - ctx->base.result_dma = dma_map_single(priv->dev, areq->result, + ctx->base.result_dma = dma_map_single(priv->dev, req->state, req->state_sz, DMA_FROM_DEVICE); if (dma_mapping_error(priv->dev, ctx->base.result_dma)) { ret = -EINVAL; @@ -292,7 +293,6 @@ send_command: req->processed += len; request->req = &areq->base; - ctx->base.handle_result = safexcel_handle_result; *commands = n_cdesc; *results = 1; @@ -374,8 +374,6 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, ring = safexcel_select_ring(priv); ctx->base.ring = ring; - ctx->base.needs_inv = false; - ctx->base.send = safexcel_ahash_send; spin_lock_bh(&priv->ring[ring].queue_lock); enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async); @@ -392,6 +390,26 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, return 1; } +static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) +{ + struct ahash_request *areq = ahash_request_cast(async); + struct safexcel_ahash_req *req = ahash_request_ctx(areq); + int err; + + if (req->needs_inv) { + req->needs_inv = false; + err = safexcel_handle_inv_result(priv, ring, async, + should_complete, ret); + } else { + err = safexcel_handle_req_result(priv, ring, async, + should_complete, ret); + } + + return err; +} + static int safexcel_ahash_send_inv(struct crypto_async_request *async, int ring, struct safexcel_request *request, int *commands, int *results) @@ -400,7 +418,6 @@ static int safexcel_ahash_send_inv(struct crypto_async_request *async, struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); int ret; - ctx->base.handle_result = safexcel_handle_inv_result; ret = safexcel_invalidate_cache(async, &ctx->base, ctx->priv, ctx->base.ctxr_dma, ring, request); if (unlikely(ret)) @@ -412,28 +429,46 @@ static int safexcel_ahash_send_inv(struct crypto_async_request *async, return 0; } +static int safexcel_ahash_send(struct crypto_async_request *async, + int ring, struct safexcel_request *request, + int *commands, int *results) +{ + struct ahash_request *areq = ahash_request_cast(async); + struct safexcel_ahash_req *req = ahash_request_ctx(areq); + int ret; + + if (req->needs_inv) + ret = safexcel_ahash_send_inv(async, ring, request, + commands, results); + else + ret = safexcel_ahash_send_req(async, ring, request, + commands, results); + return ret; +} + static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) { struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_crypto_priv *priv = ctx->priv; - struct ahash_request req; + AHASH_REQUEST_ON_STACK(req, __crypto_ahash_cast(tfm)); + struct safexcel_ahash_req *rctx = ahash_request_ctx(req); struct safexcel_inv_result result = {}; int ring = ctx->base.ring; - memset(&req, 0, sizeof(struct ahash_request)); + memset(req, 0, sizeof(struct ahash_request)); /* create invalidation request */ init_completion(&result.completion); - ahash_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG, + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, safexcel_inv_complete, &result); - ahash_request_set_tfm(&req, __crypto_ahash_cast(tfm)); - ctx = crypto_tfm_ctx(req.base.tfm); + ahash_request_set_tfm(req, __crypto_ahash_cast(tfm)); + ctx = crypto_tfm_ctx(req->base.tfm); ctx->base.exit_inv = true; - ctx->base.send = safexcel_ahash_send_inv; + rctx->needs_inv = true; spin_lock_bh(&priv->ring[ring].queue_lock); - crypto_enqueue_request(&priv->ring[ring].queue, &req.base); + crypto_enqueue_request(&priv->ring[ring].queue, &req->base); spin_unlock_bh(&priv->ring[ring].queue_lock); if (!priv->ring[ring].need_dequeue) @@ -481,14 +516,16 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq) struct safexcel_crypto_priv *priv = ctx->priv; int ret, ring; - ctx->base.send = safexcel_ahash_send; + req->needs_inv = false; if (req->processed && ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED) ctx->base.needs_inv = safexcel_ahash_needs_inv_get(areq); if (ctx->base.ctxr) { - if (ctx->base.needs_inv) - ctx->base.send = safexcel_ahash_send_inv; + if (ctx->base.needs_inv) { + ctx->base.needs_inv = false; + req->needs_inv = true; + } } else { ctx->base.ring = safexcel_select_ring(priv); ctx->base.ctxr = dma_pool_zalloc(priv->context_pool, @@ -622,6 +659,8 @@ static int safexcel_ahash_cra_init(struct crypto_tfm *tfm) struct safexcel_alg_template, alg.ahash); ctx->priv = tmpl->priv; + ctx->base.send = safexcel_ahash_send; + ctx->base.handle_result = safexcel_handle_result; crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct safexcel_ahash_req)); diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 48de52cf2ecc..662e709812cc 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1625,6 +1625,7 @@ static int queue_cache_init(void) CWQ_ENTRY_SIZE, 0, NULL); if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) { kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); + queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL; return -ENOMEM; } return 0; @@ -1634,6 +1635,8 @@ static void queue_cache_destroy(void) { kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]); + queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL; + queue_cache[HV_NCS_QTYPE_CWQ - 1] = NULL; } static long spu_queue_register_workfn(void *arg) diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index ec8ac5c4dd84..055e2e8f985a 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -20,10 +20,6 @@ #define NO_FURTHER_WRITE_ACTION -1 -#ifndef phys_to_page -#define phys_to_page(x) pfn_to_page((x) >> PAGE_SHIFT) -#endif - /** * efi_free_all_buff_pages - free all previous allocated buffer pages * @cap_info: pointer to current instance of capsule_info structure @@ -35,7 +31,7 @@ static void efi_free_all_buff_pages(struct capsule_info *cap_info) { while (cap_info->index > 0) - __free_page(phys_to_page(cap_info->pages[--cap_info->index])); + __free_page(cap_info->pages[--cap_info->index]); cap_info->index = NO_FURTHER_WRITE_ACTION; } @@ -71,6 +67,14 @@ int __efi_capsule_setup_info(struct capsule_info *cap_info) cap_info->pages = temp_page; + temp_page = krealloc(cap_info->phys, + pages_needed * sizeof(phys_addr_t *), + GFP_KERNEL | __GFP_ZERO); + if (!temp_page) + return -ENOMEM; + + cap_info->phys = temp_page; + return 0; } @@ -105,9 +109,24 @@ int __weak efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, **/ static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info) { + bool do_vunmap = false; int ret; - ret = efi_capsule_update(&cap_info->header, cap_info->pages); + /* + * cap_info->capsule may have been assigned already by a quirk + * handler, so only overwrite it if it is NULL + */ + if (!cap_info->capsule) { + cap_info->capsule = vmap(cap_info->pages, cap_info->index, + VM_MAP, PAGE_KERNEL); + if (!cap_info->capsule) + return -ENOMEM; + do_vunmap = true; + } + + ret = efi_capsule_update(cap_info->capsule, cap_info->phys); + if (do_vunmap) + vunmap(cap_info->capsule); if (ret) { pr_err("capsule update failed\n"); return ret; @@ -165,10 +184,12 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff, goto failed; } - cap_info->pages[cap_info->index++] = page_to_phys(page); + cap_info->pages[cap_info->index] = page; + cap_info->phys[cap_info->index] = page_to_phys(page); cap_info->page_bytes_remain = PAGE_SIZE; + cap_info->index++; } else { - page = phys_to_page(cap_info->pages[cap_info->index - 1]); + page = cap_info->pages[cap_info->index - 1]; } kbuff = kmap(page); @@ -252,6 +273,7 @@ static int efi_capsule_release(struct inode *inode, struct file *file) struct capsule_info *cap_info = file->private_data; kfree(cap_info->pages); + kfree(cap_info->phys); kfree(file->private_data); file->private_data = NULL; return 0; @@ -281,6 +303,13 @@ static int efi_capsule_open(struct inode *inode, struct file *file) return -ENOMEM; } + cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); + if (!cap_info->phys) { + kfree(cap_info->pages); + kfree(cap_info); + return -ENOMEM; + } + file->private_data = cap_info; return 0; diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index dfcf56ee3c61..76861a00bb92 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -522,6 +522,7 @@ static struct of_device_id const bcm_kona_gpio_of_match[] = { * category than their parents, so it won't report false recursion. */ static struct lock_class_key gpio_lock_class; +static struct lock_class_key gpio_request_class; static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) @@ -531,7 +532,7 @@ static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq, ret = irq_set_chip_data(irq, d->host_data); if (ret < 0) return ret; - irq_set_lockdep_class(irq, &gpio_lock_class); + irq_set_lockdep_class(irq, &gpio_lock_class, &gpio_request_class); irq_set_chip_and_handler(irq, &bcm_gpio_irq_chip, handle_simple_irq); irq_set_noprobe(irq); diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c index 545d43a587b7..bb4f8cf18bd9 100644 --- a/drivers/gpio/gpio-brcmstb.c +++ b/drivers/gpio/gpio-brcmstb.c @@ -327,6 +327,7 @@ static struct brcmstb_gpio_bank *brcmstb_gpio_hwirq_to_bank( * category than their parents, so it won't report false recursion. */ static struct lock_class_key brcmstb_gpio_irq_lock_class; +static struct lock_class_key brcmstb_gpio_irq_request_class; static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq, @@ -346,7 +347,8 @@ static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq, ret = irq_set_chip_data(irq, &bank->gc); if (ret < 0) return ret; - irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class); + irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class, + &brcmstb_gpio_irq_request_class); irq_set_chip_and_handler(irq, &priv->irq_chip, handle_level_irq); irq_set_noprobe(irq); return 0; diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index 8db47f671708..02fa8fe2292a 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -565,6 +565,7 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = { * than their parents, so it won't report false recursion. */ static struct lock_class_key gpio_lock_class; +static struct lock_class_key gpio_request_class; static int tegra_gpio_probe(struct platform_device *pdev) { @@ -670,7 +671,8 @@ static int tegra_gpio_probe(struct platform_device *pdev) bank = &tgi->bank_info[GPIO_BANK(gpio)]; - irq_set_lockdep_class(irq, &gpio_lock_class); + irq_set_lockdep_class(irq, &gpio_lock_class, + &gpio_request_class); irq_set_chip_data(irq, bank); irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq); } diff --git a/drivers/gpio/gpio-xgene-sb.c b/drivers/gpio/gpio-xgene-sb.c index 2313af82fad3..acd59113e08b 100644 --- a/drivers/gpio/gpio-xgene-sb.c +++ b/drivers/gpio/gpio-xgene-sb.c @@ -139,7 +139,7 @@ static int xgene_gpio_sb_to_irq(struct gpio_chip *gc, u32 gpio) static int xgene_gpio_sb_domain_activate(struct irq_domain *d, struct irq_data *irq_data, - bool early) + bool reserve) { struct xgene_gpio_sb *priv = d->host_data; u32 gpio = HWIRQ_TO_GPIO(priv, irq_data->hwirq); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index aad84a6306c4..44332b793718 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -73,7 +73,8 @@ LIST_HEAD(gpio_devices); static void gpiochip_free_hogs(struct gpio_chip *chip); static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip); static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip); static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip); @@ -1100,7 +1101,8 @@ static void gpiochip_setup_devs(void) } int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, - struct lock_class_key *key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { unsigned long flags; int status = 0; @@ -1246,7 +1248,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, if (status) goto err_remove_from_list; - status = gpiochip_add_irqchip(chip, key); + status = gpiochip_add_irqchip(chip, lock_key, request_key); if (status) goto err_remove_chip; @@ -1632,7 +1634,7 @@ int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, * This lock class tells lockdep that GPIO irqs are in a different * category than their parents, so it won't report false recursion. */ - irq_set_lockdep_class(irq, chip->irq.lock_key); + irq_set_lockdep_class(irq, chip->irq.lock_key, chip->irq.request_key); irq_set_chip_and_handler(irq, chip->irq.chip, chip->irq.handler); /* Chips that use nested thread handlers have them marked */ if (chip->irq.threaded) @@ -1712,10 +1714,12 @@ static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset) /** * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip * @gpiochip: the GPIO chip to add the IRQ chip to - * @lock_key: lockdep class + * @lock_key: lockdep class for IRQ lock + * @request_key: lockdep class for IRQ request */ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *lock_key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { struct irq_chip *irqchip = gpiochip->irq.chip; const struct irq_domain_ops *ops; @@ -1753,6 +1757,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, gpiochip->to_irq = gpiochip_to_irq; gpiochip->irq.default_type = type; gpiochip->irq.lock_key = lock_key; + gpiochip->irq.request_key = request_key; if (gpiochip->irq.domain_ops) ops = gpiochip->irq.domain_ops; @@ -1850,7 +1855,8 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip) * @type: the default type for IRQs on this irqchip, pass IRQ_TYPE_NONE * to have the core avoid setting up any default type in the hardware. * @threaded: whether this irqchip uses a nested thread handler - * @lock_key: lockdep class + * @lock_key: lockdep class for IRQ lock + * @request_key: lockdep class for IRQ request * * This function closely associates a certain irqchip with a certain * gpiochip, providing an irq domain to translate the local IRQs to @@ -1872,7 +1878,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type, bool threaded, - struct lock_class_key *lock_key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { struct device_node *of_node; @@ -1913,6 +1920,7 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, gpiochip->irq.default_type = type; gpiochip->to_irq = gpiochip_to_irq; gpiochip->irq.lock_key = lock_key; + gpiochip->irq.request_key = request_key; gpiochip->irq.domain = irq_domain_add_simple(of_node, gpiochip->ngpio, first_irq, &gpiochip_domain_ops, gpiochip); @@ -1940,7 +1948,8 @@ EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_key); #else /* CONFIG_GPIOLIB_IRQCHIP */ static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { return 0; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h index a9782b1aba47..34daf895f848 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h @@ -1360,7 +1360,7 @@ void dpp1_cm_set_output_csc_adjustment( void dpp1_cm_set_output_csc_default( struct dpp *dpp_base, - const struct default_adjustment *default_adjust); + enum dc_color_space colorspace); void dpp1_cm_set_gamut_remap( struct dpp *dpp, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c index 40627c244bf5..ed1216b53465 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c @@ -225,14 +225,13 @@ void dpp1_cm_set_gamut_remap( void dpp1_cm_set_output_csc_default( struct dpp *dpp_base, - const struct default_adjustment *default_adjust) + enum dc_color_space colorspace) { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); uint32_t ocsc_mode = 0; - if (default_adjust != NULL) { - switch (default_adjust->out_color_space) { + switch (colorspace) { case COLOR_SPACE_SRGB: case COLOR_SPACE_2020_RGB_FULLRANGE: ocsc_mode = 0; @@ -253,7 +252,6 @@ void dpp1_cm_set_output_csc_default( case COLOR_SPACE_UNKNOWN: default: break; - } } REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 961ad5c3b454..05dc01e54531 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2097,6 +2097,8 @@ static void program_csc_matrix(struct pipe_ctx *pipe_ctx, tbl_entry.color_space = color_space; //tbl_entry.regval = matrix; pipe_ctx->plane_res.dpp->funcs->opp_set_csc_adjustment(pipe_ctx->plane_res.dpp, &tbl_entry); + } else { + pipe_ctx->plane_res.dpp->funcs->opp_set_csc_default(pipe_ctx->plane_res.dpp, colorspace); } } static bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 83a68460edcd..9420dfb94d39 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -64,7 +64,7 @@ struct dpp_funcs { void (*opp_set_csc_default)( struct dpp *dpp, - const struct default_adjustment *default_adjust); + enum dc_color_space colorspace); void (*opp_set_csc_adjustment)( struct dpp *dpp, diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index 2e065facdce7..a0f4d2a2a481 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -168,16 +168,23 @@ static void armada_drm_crtc_update(struct armada_crtc *dcrtc) void armada_drm_plane_calc_addrs(u32 *addrs, struct drm_framebuffer *fb, int x, int y) { + const struct drm_format_info *format = fb->format; + unsigned int num_planes = format->num_planes; u32 addr = drm_fb_obj(fb)->dev_addr; - int num_planes = fb->format->num_planes; int i; if (num_planes > 3) num_planes = 3; - for (i = 0; i < num_planes; i++) + addrs[0] = addr + fb->offsets[0] + y * fb->pitches[0] + + x * format->cpp[0]; + + y /= format->vsub; + x /= format->hsub; + + for (i = 1; i < num_planes; i++) addrs[i] = addr + fb->offsets[i] + y * fb->pitches[i] + - x * fb->format->cpp[i]; + x * format->cpp[i]; for (; i < 3; i++) addrs[i] = 0; } @@ -744,15 +751,14 @@ void armada_drm_crtc_plane_disable(struct armada_crtc *dcrtc, if (plane->fb) drm_framebuffer_put(plane->fb); - /* Power down the Y/U/V FIFOs */ - sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66; - /* Power down most RAMs and FIFOs if this is the primary plane */ if (plane->type == DRM_PLANE_TYPE_PRIMARY) { - sram_para1 |= CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 | - CFG_PDWN32x32 | CFG_PDWN64x66; + sram_para1 = CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 | + CFG_PDWN32x32 | CFG_PDWN64x66; dma_ctrl0_mask = CFG_GRA_ENA; } else { + /* Power down the Y/U/V FIFOs */ + sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66; dma_ctrl0_mask = CFG_DMA_ENA; } @@ -1225,17 +1231,13 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, ret = devm_request_irq(dev, irq, armada_drm_irq, 0, "armada_drm_crtc", dcrtc); - if (ret < 0) { - kfree(dcrtc); - return ret; - } + if (ret < 0) + goto err_crtc; if (dcrtc->variant->init) { ret = dcrtc->variant->init(dcrtc, dev); - if (ret) { - kfree(dcrtc); - return ret; - } + if (ret) + goto err_crtc; } /* Ensure AXI pipeline is enabled */ @@ -1246,13 +1248,15 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, dcrtc->crtc.port = port; primary = kzalloc(sizeof(*primary), GFP_KERNEL); - if (!primary) - return -ENOMEM; + if (!primary) { + ret = -ENOMEM; + goto err_crtc; + } ret = armada_drm_plane_init(primary); if (ret) { kfree(primary); - return ret; + goto err_crtc; } ret = drm_universal_plane_init(drm, &primary->base, 0, @@ -1263,7 +1267,7 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { kfree(primary); - return ret; + goto err_crtc; } ret = drm_crtc_init_with_planes(drm, &dcrtc->crtc, &primary->base, NULL, @@ -1282,6 +1286,9 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, err_crtc_init: primary->base.funcs->destroy(&primary->base); +err_crtc: + kfree(dcrtc); + return ret; } diff --git a/drivers/gpu/drm/armada/armada_crtc.h b/drivers/gpu/drm/armada/armada_crtc.h index bab11f483575..bfd3514fbe9b 100644 --- a/drivers/gpu/drm/armada/armada_crtc.h +++ b/drivers/gpu/drm/armada/armada_crtc.h @@ -42,6 +42,8 @@ struct armada_plane_work { }; struct armada_plane_state { + u16 src_x; + u16 src_y; u32 src_hw; u32 dst_hw; u32 dst_yx; diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c index b411b608821a..aba947696178 100644 --- a/drivers/gpu/drm/armada/armada_overlay.c +++ b/drivers/gpu/drm/armada/armada_overlay.c @@ -99,6 +99,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, { struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(plane); struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); + const struct drm_format_info *format; struct drm_rect src = { .x1 = src_x, .y1 = src_y, @@ -117,7 +118,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, }; uint32_t val, ctrl0; unsigned idx = 0; - bool visible; + bool visible, fb_changed; int ret; trace_armada_ovl_plane_update(plane, crtc, fb, @@ -138,6 +139,18 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, if (!visible) ctrl0 &= ~CFG_DMA_ENA; + /* + * Shifting a YUV packed format image by one pixel causes the U/V + * planes to swap. Compensate for it by also toggling the UV swap. + */ + format = fb->format; + if (format->num_planes == 1 && src.x1 >> 16 & (format->hsub - 1)) + ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV); + + fb_changed = plane->fb != fb || + dplane->base.state.src_x != src.x1 >> 16 || + dplane->base.state.src_y != src.y1 >> 16; + if (!dcrtc->plane) { dcrtc->plane = plane; armada_ovl_update_attr(&dplane->prop, dcrtc); @@ -145,7 +158,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, /* FIXME: overlay on an interlaced display */ /* Just updating the position/size? */ - if (plane->fb == fb && dplane->base.state.ctrl0 == ctrl0) { + if (!fb_changed && dplane->base.state.ctrl0 == ctrl0) { val = (drm_rect_height(&src) & 0xffff0000) | drm_rect_width(&src) >> 16; dplane->base.state.src_hw = val; @@ -169,9 +182,8 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, if (armada_drm_plane_work_wait(&dplane->base, HZ / 25) == 0) armada_drm_plane_work_cancel(dcrtc, &dplane->base); - if (plane->fb != fb) { - u32 addrs[3], pixel_format; - int num_planes, hsub; + if (fb_changed) { + u32 addrs[3]; /* * Take a reference on the new framebuffer - we want to @@ -182,23 +194,11 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, if (plane->fb) armada_ovl_retire_fb(dplane, plane->fb); - src_y = src.y1 >> 16; - src_x = src.x1 >> 16; + dplane->base.state.src_y = src_y = src.y1 >> 16; + dplane->base.state.src_x = src_x = src.x1 >> 16; armada_drm_plane_calc_addrs(addrs, fb, src_x, src_y); - pixel_format = fb->format->format; - hsub = drm_format_horz_chroma_subsampling(pixel_format); - num_planes = fb->format->num_planes; - - /* - * Annoyingly, shifting a YUYV-format image by one pixel - * causes the U/V planes to toggle. Toggle the UV swap. - * (Unfortunately, this causes momentary colour flickering.) - */ - if (src_x & (hsub - 1) && num_planes == 1) - ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV); - armada_reg_queue_set(dplane->vbl.regs, idx, addrs[0], LCD_SPU_DMA_START_ADDR_Y0); armada_reg_queue_set(dplane->vbl.regs, idx, addrs[1], diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 54b5d4c582b6..e143004e66d5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2368,6 +2368,9 @@ struct drm_i915_private { */ struct workqueue_struct *wq; + /* ordered wq for modesets */ + struct workqueue_struct *modeset_wq; + /* Display functions */ struct drm_i915_display_funcs display; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3866c49bc390..333f40bc03bb 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6977,6 +6977,7 @@ enum { #define RESET_PCH_HANDSHAKE_ENABLE (1<<4) #define GEN8_CHICKEN_DCPR_1 _MMIO(0x46430) +#define SKL_SELECT_ALTERNATE_DC_EXIT (1<<30) #define MASK_WAKEMEM (1<<13) #define SKL_DFSM _MMIO(0x51000) @@ -8522,6 +8523,7 @@ enum skl_power_gate { #define BXT_CDCLK_CD2X_DIV_SEL_2 (2<<22) #define BXT_CDCLK_CD2X_DIV_SEL_4 (3<<22) #define BXT_CDCLK_CD2X_PIPE(pipe) ((pipe)<<20) +#define CDCLK_DIVMUX_CD_OVERRIDE (1<<19) #define BXT_CDCLK_CD2X_PIPE_NONE BXT_CDCLK_CD2X_PIPE(3) #define BXT_CDCLK_SSA_PRECHARGE_ENABLE (1<<16) #define CDCLK_FREQ_DECIMAL_MASK (0x7ff) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index b2a6d62b71c0..60cf4e58389a 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -860,16 +860,10 @@ static void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) { - int min_cdclk = skl_calc_cdclk(0, vco); u32 val; WARN_ON(vco != 8100000 && vco != 8640000); - /* select the minimum CDCLK before enabling DPLL 0 */ - val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk); - I915_WRITE(CDCLK_CTL, val); - POSTING_READ(CDCLK_CTL); - /* * We always enable DPLL0 with the lowest link rate possible, but still * taking into account the VCO required to operate the eDP panel at the @@ -923,7 +917,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, { int cdclk = cdclk_state->cdclk; int vco = cdclk_state->vco; - u32 freq_select, pcu_ack; + u32 freq_select, pcu_ack, cdclk_ctl; int ret; WARN_ON((cdclk == 24000) != (vco == 0)); @@ -940,7 +934,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, return; } - /* set CDCLK_CTL */ + /* Choose frequency for this cdclk */ switch (cdclk) { case 450000: case 432000: @@ -968,10 +962,33 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, dev_priv->cdclk.hw.vco != vco) skl_dpll0_disable(dev_priv); + cdclk_ctl = I915_READ(CDCLK_CTL); + + if (dev_priv->cdclk.hw.vco != vco) { + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK); + cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk); + I915_WRITE(CDCLK_CTL, cdclk_ctl); + } + + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl |= CDCLK_DIVMUX_CD_OVERRIDE; + I915_WRITE(CDCLK_CTL, cdclk_ctl); + POSTING_READ(CDCLK_CTL); + if (dev_priv->cdclk.hw.vco != vco) skl_dpll0_enable(dev_priv, vco); - I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK); + I915_WRITE(CDCLK_CTL, cdclk_ctl); + + cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk); + I915_WRITE(CDCLK_CTL, cdclk_ctl); + + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl &= ~CDCLK_DIVMUX_CD_OVERRIDE; + I915_WRITE(CDCLK_CTL, cdclk_ctl); POSTING_READ(CDCLK_CTL); /* inform PCU of the change */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 30cf273d57aa..123585eeb87d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12544,11 +12544,15 @@ static int intel_atomic_commit(struct drm_device *dev, INIT_WORK(&state->commit_work, intel_atomic_commit_work); i915_sw_fence_commit(&intel_state->commit_ready); - if (nonblock) + if (nonblock && intel_state->modeset) { + queue_work(dev_priv->modeset_wq, &state->commit_work); + } else if (nonblock) { queue_work(system_unbound_wq, &state->commit_work); - else + } else { + if (intel_state->modeset) + flush_workqueue(dev_priv->modeset_wq); intel_atomic_commit_tail(state); - + } return 0; } @@ -14462,6 +14466,8 @@ int intel_modeset_init(struct drm_device *dev) enum pipe pipe; struct intel_crtc *crtc; + dev_priv->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0); + drm_mode_config_init(dev); dev->mode_config.min_width = 0; @@ -15270,6 +15276,8 @@ void intel_modeset_cleanup(struct drm_device *dev) intel_cleanup_gt_powersave(dev_priv); intel_teardown_gmbus(dev_priv); + + destroy_workqueue(dev_priv->modeset_wq); } void intel_connector_attach_encoder(struct intel_connector *connector, diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 6e3b430fccdc..55ea5eb3b7df 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -590,7 +590,7 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, struct drm_i915_private *dev_priv = to_i915(dev); if (dev_priv->psr.active) { - i915_reg_t psr_ctl; + i915_reg_t psr_status; u32 psr_status_mask; if (dev_priv->psr.aux_frame_sync) @@ -599,24 +599,24 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, 0); if (dev_priv->psr.psr2_support) { - psr_ctl = EDP_PSR2_CTL; + psr_status = EDP_PSR2_STATUS_CTL; psr_status_mask = EDP_PSR2_STATUS_STATE_MASK; - I915_WRITE(psr_ctl, - I915_READ(psr_ctl) & + I915_WRITE(EDP_PSR2_CTL, + I915_READ(EDP_PSR2_CTL) & ~(EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE)); } else { - psr_ctl = EDP_PSR_STATUS_CTL; + psr_status = EDP_PSR_STATUS_CTL; psr_status_mask = EDP_PSR_STATUS_STATE_MASK; - I915_WRITE(psr_ctl, - I915_READ(psr_ctl) & ~EDP_PSR_ENABLE); + I915_WRITE(EDP_PSR_CTL, + I915_READ(EDP_PSR_CTL) & ~EDP_PSR_ENABLE); } /* Wait till PSR is idle */ if (intel_wait_for_register(dev_priv, - psr_ctl, psr_status_mask, 0, + psr_status, psr_status_mask, 0, 2000)) DRM_ERROR("Timed out waiting for PSR Idle State\n"); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 8af286c63d3b..7e115f3927f6 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -598,6 +598,11 @@ void gen9_enable_dc5(struct drm_i915_private *dev_priv) DRM_DEBUG_KMS("Enabling DC5\n"); + /* Wa Display #1183: skl,kbl,cfl */ + if (IS_GEN9_BC(dev_priv)) + I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | + SKL_SELECT_ALTERNATE_DC_EXIT); + gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5); } @@ -625,6 +630,11 @@ void skl_disable_dc6(struct drm_i915_private *dev_priv) { DRM_DEBUG_KMS("Disabling DC6\n"); + /* Wa Display #1183: skl,kbl,cfl */ + if (IS_GEN9_BC(dev_priv)) + I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | + SKL_SELECT_ALTERNATE_DC_EXIT); + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); } @@ -1786,6 +1796,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c index e626eddf24d5..23db74ae1826 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c @@ -78,6 +78,8 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core) /* then read the message */ msg.len = cnt & 0xf; + if (msg.len > CEC_MAX_MSG_SIZE - 2) + msg.len = CEC_MAX_MSG_SIZE - 2; msg.msg[0] = hdmi_read_reg(core->base, HDMI_CEC_RX_CMD_HEADER); msg.msg[1] = hdmi_read_reg(core->base, @@ -104,26 +106,6 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core) } } -static void hdmi_cec_transmit_fifo_empty(struct hdmi_core_data *core, u32 stat1) -{ - if (stat1 & 2) { - u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); - - cec_transmit_done(core->adap, - CEC_TX_STATUS_NACK | - CEC_TX_STATUS_MAX_RETRIES, - 0, (dbg3 >> 4) & 7, 0, 0); - } else if (stat1 & 1) { - cec_transmit_done(core->adap, - CEC_TX_STATUS_ARB_LOST | - CEC_TX_STATUS_MAX_RETRIES, - 0, 0, 0, 0); - } else if (stat1 == 0) { - cec_transmit_done(core->adap, CEC_TX_STATUS_OK, - 0, 0, 0, 0); - } -} - void hdmi4_cec_irq(struct hdmi_core_data *core) { u32 stat0 = hdmi_read_reg(core->base, HDMI_CEC_INT_STATUS_0); @@ -132,27 +114,21 @@ void hdmi4_cec_irq(struct hdmi_core_data *core) hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_0, stat0); hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_1, stat1); - if (stat0 & 0x40) + if (stat0 & 0x20) { + cec_transmit_done(core->adap, CEC_TX_STATUS_OK, + 0, 0, 0, 0); REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); - else if (stat0 & 0x24) - hdmi_cec_transmit_fifo_empty(core, stat1); - if (stat1 & 2) { + } else if (stat1 & 0x02) { u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); cec_transmit_done(core->adap, CEC_TX_STATUS_NACK | CEC_TX_STATUS_MAX_RETRIES, 0, (dbg3 >> 4) & 7, 0, 0); - } else if (stat1 & 1) { - cec_transmit_done(core->adap, - CEC_TX_STATUS_ARB_LOST | - CEC_TX_STATUS_MAX_RETRIES, - 0, 0, 0, 0); + REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); } if (stat0 & 0x02) hdmi_cec_received_msg(core); - if (stat1 & 0x3) - REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); } static bool hdmi_cec_clear_tx_fifo(struct cec_adapter *adap) @@ -231,18 +207,14 @@ static int hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable) /* * Enable CEC interrupts: * Transmit Buffer Full/Empty Change event - * Transmitter FIFO Empty event * Receiver FIFO Not Empty event */ - hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_0, 0x26); + hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_0, 0x22); /* * Enable CEC interrupts: - * RX FIFO Overrun Error event - * Short Pulse Detected event * Frame Retransmit Count Exceeded event - * Start Bit Irregularity event */ - hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_1, 0x0f); + hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_1, 0x02); /* cec calibration enable (self clearing) */ hdmi_write_reg(core->base, HDMI_CEC_SETUP, 0x03); diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index b5ba6441489f..5d252fb27a82 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -1007,6 +1007,8 @@ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages) pr_info("Initializing pool allocator\n"); _manager = kzalloc(sizeof(*_manager), GFP_KERNEL); + if (!_manager) + return -ENOMEM; ttm_page_pool_init_locked(&_manager->wc_pool, GFP_HIGHUSER, "wc", 0); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index f3fcb836a1f9..0c3f608131cf 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -551,7 +551,7 @@ static int hid_parser_main(struct hid_parser *parser, struct hid_item *item) ret = hid_add_field(parser, HID_FEATURE_REPORT, data); break; default: - hid_err(parser->device, "unknown main item tag 0x%x\n", item->tag); + hid_warn(parser->device, "unknown main item tag 0x%x\n", item->tag); ret = 0; } diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 68cdc962265b..271f31461da4 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -696,8 +696,16 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, (u8 *)&word, 2); break; case I2C_SMBUS_I2C_BLOCK_DATA: - size = I2C_SMBUS_BLOCK_DATA; - /* fallthrough */ + if (read_write == I2C_SMBUS_READ) { + read_length = data->block[0]; + count = cp2112_write_read_req(buf, addr, read_length, + command, NULL, 0); + } else { + count = cp2112_write_req(buf, addr, command, + data->block + 1, + data->block[0]); + } + break; case I2C_SMBUS_BLOCK_DATA: if (I2C_SMBUS_READ == read_write) { count = cp2112_write_read_req(buf, addr, @@ -785,6 +793,9 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, case I2C_SMBUS_WORD_DATA: data->word = le16_to_cpup((__le16 *)buf); break; + case I2C_SMBUS_I2C_BLOCK_DATA: + memcpy(data->block + 1, buf, read_length); + break; case I2C_SMBUS_BLOCK_DATA: if (read_length > I2C_SMBUS_BLOCK_MAX) { ret = -EPROTO; diff --git a/drivers/hid/hid-holtekff.c b/drivers/hid/hid-holtekff.c index 9325545fc3ae..edc0f64bb584 100644 --- a/drivers/hid/hid-holtekff.c +++ b/drivers/hid/hid-holtekff.c @@ -32,10 +32,6 @@ #ifdef CONFIG_HOLTEK_FF -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Anssi Hannula <anssi.hannula@iki.fi>"); -MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices"); - /* * These commands and parameters are currently known: * @@ -223,3 +219,7 @@ static struct hid_driver holtek_driver = { .probe = holtek_probe, }; module_hid_driver(holtek_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Anssi Hannula <anssi.hannula@iki.fi>"); +MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices"); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 76ed9a216f10..610223f0e945 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1378,6 +1378,8 @@ void vmbus_device_unregister(struct hv_device *device_obj) pr_debug("child device %s unregistered\n", dev_name(&device_obj->device)); + kset_unregister(device_obj->channels_kset); + /* * Kick off the process of unregistering the device. * This will call vmbus_remove() and eventually vmbus_device_release() diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index a1d687a664f8..66f0268f37a6 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -314,7 +314,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map, } #endif -struct ib_device *__ib_device_get_by_index(u32 ifindex); +struct ib_device *ib_device_get_by_index(u32 ifindex); /* RDMA device netlink */ void nldev_init(void); void nldev_exit(void); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 30914f3baa5f..465520627e4b 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -134,7 +134,7 @@ static int ib_device_check_mandatory(struct ib_device *device) return 0; } -struct ib_device *__ib_device_get_by_index(u32 index) +static struct ib_device *__ib_device_get_by_index(u32 index) { struct ib_device *device; @@ -145,6 +145,22 @@ struct ib_device *__ib_device_get_by_index(u32 index) return NULL; } +/* + * Caller is responsible to return refrerence count by calling put_device() + */ +struct ib_device *ib_device_get_by_index(u32 index) +{ + struct ib_device *device; + + down_read(&lists_rwsem); + device = __ib_device_get_by_index(index); + if (device) + get_device(&device->dev); + + up_read(&lists_rwsem); + return device; +} + static struct ib_device *__ib_device_get_by_name(const char *name) { struct ib_device *device; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 9a05245a1acf..0dcd1aa6f683 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -142,27 +142,34 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = __ib_device_get_by_index(index); + device = ib_device_get_by_index(index); if (!device) return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; + if (!msg) { + err = -ENOMEM; + goto err; + } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, 0); err = fill_dev_info(msg, device); - if (err) { - nlmsg_free(msg); - return err; - } + if (err) + goto err_free; nlmsg_end(msg, nlh); + put_device(&device->dev); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + +err_free: + nlmsg_free(msg); +err: + put_device(&device->dev); + return err; } static int _nldev_get_dumpit(struct ib_device *device, @@ -220,31 +227,40 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = __ib_device_get_by_index(index); + device = ib_device_get_by_index(index); if (!device) return -EINVAL; port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); - if (!rdma_is_port_valid(device, port)) - return -EINVAL; + if (!rdma_is_port_valid(device, port)) { + err = -EINVAL; + goto err; + } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; + if (!msg) { + err = -ENOMEM; + goto err; + } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, 0); err = fill_port_info(msg, device, port); - if (err) { - nlmsg_free(msg); - return err; - } + if (err) + goto err_free; nlmsg_end(msg, nlh); + put_device(&device->dev); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + +err_free: + nlmsg_free(msg); +err: + put_device(&device->dev); + return err; } static int nldev_port_get_dumpit(struct sk_buff *skb, @@ -265,7 +281,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, return -EINVAL; ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = __ib_device_get_by_index(ifindex); + device = ib_device_get_by_index(ifindex); if (!device) return -EINVAL; @@ -299,7 +315,9 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, nlmsg_end(skb, nlh); } -out: cb->args[0] = idx; +out: + put_device(&device->dev); + cb->args[0] = idx; return skb->len; } diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 313bfb9ccb71..4975f3e6596e 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -642,7 +642,6 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, goto err_free_mr; mr->max_pages = max_num_sg; - err = mlx4_mr_enable(dev->dev, &mr->mmr); if (err) goto err_free_pl; @@ -653,6 +652,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, return &mr->ibmr; err_free_pl: + mr->ibmr.device = pd->device; mlx4_free_priv_pages(mr); err_free_mr: (void) mlx4_mr_free(dev->dev, &mr->mmr); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 12b7f911f0e5..8880351df179 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -902,8 +902,8 @@ static int path_rec_start(struct net_device *dev, return 0; } -static void neigh_add_path(struct sk_buff *skb, u8 *daddr, - struct net_device *dev) +static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr, + struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct rdma_netdev *rn = netdev_priv(dev); @@ -917,7 +917,15 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, spin_unlock_irqrestore(&priv->lock, flags); ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); - return; + return NULL; + } + + /* To avoid race condition, make sure that the + * neigh will be added only once. + */ + if (unlikely(!list_empty(&neigh->list))) { + spin_unlock_irqrestore(&priv->lock, flags); + return neigh; } path = __path_find(dev, daddr + 4); @@ -956,7 +964,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, path->ah->last_send = rn->send(dev, skb, path->ah->ah, IPOIB_QPN(daddr)); ipoib_neigh_put(neigh); - return; + return NULL; } } else { neigh->ah = NULL; @@ -973,7 +981,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, spin_unlock_irqrestore(&priv->lock, flags); ipoib_neigh_put(neigh); - return; + return NULL; err_path: ipoib_neigh_free(neigh); @@ -983,6 +991,8 @@ err_drop: spin_unlock_irqrestore(&priv->lock, flags); ipoib_neigh_put(neigh); + + return NULL; } static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, @@ -1091,8 +1101,9 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) case htons(ETH_P_TIPC): neigh = ipoib_neigh_get(dev, phdr->hwaddr); if (unlikely(!neigh)) { - neigh_add_path(skb, phdr->hwaddr, dev); - return NETDEV_TX_OK; + neigh = neigh_add_path(skb, phdr->hwaddr, dev); + if (likely(!neigh)) + return NETDEV_TX_OK; } break; case htons(ETH_P_ARP): diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 93e149efc1f5..9b3f47ae2016 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -816,7 +816,10 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) spin_lock_irqsave(&priv->lock, flags); if (!neigh) { neigh = ipoib_neigh_alloc(daddr, dev); - if (neigh) { + /* Make sure that the neigh will be added only + * once to mcast list. + */ + if (neigh && list_empty(&neigh->list)) { kref_get(&mcast->ah->ref); neigh->ah = mcast->ah; list_add_tail(&neigh->list, &mcast->neigh_list); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 8a1bd354b1cc..bfa576aa9f03 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1013,8 +1013,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp) return -ENOMEM; attr->qp_state = IB_QPS_INIT; - attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE; + attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; attr->port_num = ch->sport->port; attr->pkey_index = 0; @@ -2078,7 +2077,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, goto destroy_ib; } - guid = (__be16 *)¶m->primary_path->sgid.global.interface_id; + guid = (__be16 *)¶m->primary_path->dgid.global.interface_id; snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x", be16_to_cpu(guid[0]), be16_to_cpu(guid[1]), be16_to_cpu(guid[2]), be16_to_cpu(guid[3])); diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index 3d8ff09eba57..c868a878c84f 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -163,7 +163,7 @@ static unsigned int get_time_pit(void) #define GET_TIME(x) do { x = (unsigned int)rdtsc(); } while (0) #define DELTA(x,y) ((y)-(x)) #define TIME_NAME "TSC" -#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_TILE) +#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV) || defined(CONFIG_TILE) #define GET_TIME(x) do { x = get_cycles(); } while (0) #define DELTA(x,y) ((y)-(x)) #define TIME_NAME "get_cycles" diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c index ae473123583b..3d51175c4d72 100644 --- a/drivers/input/misc/ims-pcu.c +++ b/drivers/input/misc/ims-pcu.c @@ -1651,7 +1651,7 @@ ims_pcu_get_cdc_union_desc(struct usb_interface *intf) return union_desc; dev_err(&intf->dev, - "Union descriptor to short (%d vs %zd\n)", + "Union descriptor too short (%d vs %zd)\n", union_desc->bLength, sizeof(*union_desc)); return NULL; } diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c index 6bf56bb5f8d9..d91f3b1c5375 100644 --- a/drivers/input/misc/xen-kbdfront.c +++ b/drivers/input/misc/xen-kbdfront.c @@ -326,8 +326,6 @@ static int xenkbd_probe(struct xenbus_device *dev, 0, width, 0, 0); input_set_abs_params(mtouch, ABS_MT_POSITION_Y, 0, height, 0, 0); - input_set_abs_params(mtouch, ABS_MT_PRESSURE, - 0, 255, 0, 0); ret = input_mt_init_slots(mtouch, num_cont, INPUT_MT_DIRECT); if (ret) { diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index b84cd978fce2..a4aaa748e987 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1613,7 +1613,7 @@ static int elantech_set_properties(struct elantech_data *etd) case 5: etd->hw_version = 3; break; - case 6 ... 14: + case 6 ... 15: etd->hw_version = 4; break; default: diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index e102d7764bc2..a458e5ec9e41 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -27,6 +27,7 @@ #include <linux/module.h> #include <linux/input.h> #include <linux/interrupt.h> +#include <linux/irq.h> #include <linux/platform_device.h> #include <linux/async.h> #include <linux/i2c.h> @@ -1261,10 +1262,13 @@ static int elants_i2c_probe(struct i2c_client *client, } /* - * Systems using device tree should set up interrupt via DTS, - * the rest will use the default falling edge interrupts. + * Platform code (ACPI, DTS) should normally set up interrupt + * for us, but in case it did not let's fall back to using falling + * edge to be compatible with older Chromebooks. */ - irqflags = client->dev.of_node ? 0 : IRQF_TRIGGER_FALLING; + irqflags = irq_get_trigger_type(client->irq); + if (!irqflags) + irqflags = IRQF_TRIGGER_FALLING; error = devm_request_threaded_irq(&client->dev, client->irq, NULL, elants_i2c_irq, diff --git a/drivers/input/touchscreen/hideep.c b/drivers/input/touchscreen/hideep.c index fc080a7c2e1f..f1cd4dd9a4a3 100644 --- a/drivers/input/touchscreen/hideep.c +++ b/drivers/input/touchscreen/hideep.c @@ -10,8 +10,7 @@ #include <linux/of.h> #include <linux/firmware.h> #include <linux/delay.h> -#include <linux/gpio.h> -#include <linux/gpio/machine.h> +#include <linux/gpio/consumer.h> #include <linux/i2c.h> #include <linux/acpi.h> #include <linux/interrupt.h> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 7d5eb004091d..97baf88d9505 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4184,7 +4184,7 @@ static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu, struct irq_cfg *cfg); static int irq_remapping_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { struct amd_ir_data *data = irq_data->chip_data; struct irq_2_irte *irte_info = &data->irq_2_irte; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index f122071688fd..744592d330ca 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1698,13 +1698,15 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; domain->geometry.aperture_end = (1UL << ias) - 1; domain->geometry.force_aperture = true; - smmu_domain->pgtbl_ops = pgtbl_ops; ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg); - if (ret < 0) + if (ret < 0) { free_io_pgtable_ops(pgtbl_ops); + return ret; + } - return ret; + smmu_domain->pgtbl_ops = pgtbl_ops; + return 0; } static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) @@ -1731,7 +1733,7 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) { - int i; + int i, j; struct arm_smmu_master_data *master = fwspec->iommu_priv; struct arm_smmu_device *smmu = master->smmu; @@ -1739,6 +1741,13 @@ static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) u32 sid = fwspec->ids[i]; __le64 *step = arm_smmu_get_step_for_sid(smmu, sid); + /* Bridged PCI devices may end up with duplicated IDs */ + for (j = 0; j < i; j++) + if (fwspec->ids[j] == sid) + break; + if (j < i) + continue; + arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste); } } diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 76a193c7fcfc..66f69af2c219 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -1397,7 +1397,7 @@ static void intel_irq_remapping_free(struct irq_domain *domain, } static int intel_irq_remapping_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { intel_ir_reconfigure_irte(irq_data, true); return 0; diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 4039e64cd342..06f025fd5726 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2303,7 +2303,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, } static int its_irq_domain_activate(struct irq_domain *domain, - struct irq_data *d, bool early) + struct irq_data *d, bool reserve) { struct its_device *its_dev = irq_data_get_irq_chip_data(d); u32 event = its_get_event_id(d); @@ -2818,7 +2818,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq } static int its_vpe_irq_domain_activate(struct irq_domain *domain, - struct irq_data *d, bool early) + struct irq_data *d, bool reserve) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); struct its_node *its; diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c index 06f29cf5018a..cee59fe1321c 100644 --- a/drivers/irqchip/irq-renesas-intc-irqpin.c +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -342,6 +342,9 @@ static irqreturn_t intc_irqpin_shared_irq_handler(int irq, void *dev_id) */ static struct lock_class_key intc_irqpin_irq_lock_class; +/* And this is for the request mutex */ +static struct lock_class_key intc_irqpin_irq_request_class; + static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { @@ -352,7 +355,8 @@ static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, intc_irqpin_dbg(&p->irq[hw], "map"); irq_set_chip_data(virq, h->host_data); - irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class); + irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class, + &intc_irqpin_irq_request_class); irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); return 0; } diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index f3654fd2eaf3..ede4fa0ac2cc 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -186,8 +186,9 @@ void led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on, unsigned long *delay_off) { - led_stop_software_blink(led_cdev); + del_timer_sync(&led_cdev->blink_timer); + clear_bit(LED_BLINK_SW, &led_cdev->work_flags); clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags); clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags); diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c index 09cf3699e354..a307832d7e45 100644 --- a/drivers/mfd/arizona-irq.c +++ b/drivers/mfd/arizona-irq.c @@ -184,6 +184,7 @@ static struct irq_chip arizona_irq_chip = { }; static struct lock_class_key arizona_irq_lock_class; +static struct lock_class_key arizona_irq_request_class; static int arizona_irq_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) @@ -191,7 +192,8 @@ static int arizona_irq_map(struct irq_domain *h, unsigned int virq, struct arizona *data = h->host_data; irq_set_chip_data(virq, data); - irq_set_lockdep_class(virq, &arizona_irq_lock_class); + irq_set_lockdep_class(virq, &arizona_irq_lock_class, + &arizona_irq_request_class); irq_set_chip_and_handler(virq, &arizona_irq_chip, handle_simple_irq); irq_set_nested_thread(virq, 1); irq_set_noprobe(virq); diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c index 590fb9aad77d..c3ed885c155c 100644 --- a/drivers/mfd/rtsx_pcr.c +++ b/drivers/mfd/rtsx_pcr.c @@ -1543,6 +1543,9 @@ static void rtsx_pci_shutdown(struct pci_dev *pcidev) rtsx_pci_power_off(pcr, HOST_ENTER_S1); pci_disable_device(pcidev); + free_irq(pcr->irq, (void *)pcr); + if (pcr->msi_en) + pci_disable_msi(pcr->pci); } #else /* CONFIG_PM */ diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index 90b9a9ccbe60..9285f60e5783 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -963,6 +963,7 @@ static void prepare_start_command(struct pxa3xx_nand_info *info, int command) switch (command) { case NAND_CMD_READ0: + case NAND_CMD_READOOB: case NAND_CMD_PAGEPROG: info->use_ecc = 1; break; diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 3cd371c94e83..634c51e6b8ae 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -543,7 +543,7 @@ static int flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev) data = be32_to_cpup((__be32 *)&cf->data[0]); priv->write(data, &priv->tx_mb->data[0]); } - if (cf->can_dlc > 3) { + if (cf->can_dlc > 4) { data = be32_to_cpup((__be32 *)&cf->data[4]); priv->write(data, &priv->tx_mb->data[1]); } diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index b00358297424..12ff0020ecd6 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -395,6 +395,7 @@ static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg) if (dev->can.state == CAN_STATE_ERROR_WARNING || dev->can.state == CAN_STATE_ERROR_PASSIVE) { + cf->can_id |= CAN_ERR_CRTL; cf->data[1] = (txerr > rxerr) ? CAN_ERR_CRTL_TX_PASSIVE : CAN_ERR_CRTL_RX_PASSIVE; } diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 68ac3e88a8ce..8bf80ad9dc44 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -449,7 +449,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev) dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)", rc); - return rc; + return (rc > 0) ? 0 : rc; } static void gs_usb_xmit_callback(struct urb *urb) diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index 5d1753cfacea..ed6828821fbd 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -194,7 +194,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev, tbp = peer_tb; } - if (tbp[IFLA_IFNAME]) { + if (ifmp && tbp[IFLA_IFNAME]) { nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); name_assign_type = NET_NAME_USER; } else { diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 561b05089cb6..db830a1141d9 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1497,10 +1497,13 @@ enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port) { struct b53_device *dev = ds->priv; - /* Older models support a different tag format that we do not - * support in net/dsa/tag_brcm.c yet. + /* Older models (5325, 5365) support a different tag format that we do + * not support in net/dsa/tag_brcm.c yet. 539x and 531x5 require managed + * mode to be turned on which means we need to specifically manage ARL + * misses on multicast addresses (TBD). */ - if (is5325(dev) || is5365(dev) || !b53_can_enable_brcm_tags(ds, port)) + if (is5325(dev) || is5365(dev) || is539x(dev) || is531x5(dev) || + !b53_can_enable_brcm_tags(ds, port)) return DSA_TAG_PROTO_NONE; /* Broadcom BCM58xx chips have a flow accelerator on Port 8 diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index f4e13a7014bd..36c8950dbd2d 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -602,7 +602,7 @@ struct vortex_private { struct sk_buff* rx_skbuff[RX_RING_SIZE]; struct sk_buff* tx_skbuff[TX_RING_SIZE]; unsigned int cur_rx, cur_tx; /* The next free ring entry */ - unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */ + unsigned int dirty_tx; /* The ring entries to be free()ed. */ struct vortex_extra_stats xstats; /* NIC-specific extra stats */ struct sk_buff *tx_skb; /* Packet being eaten by bus master ctrl. */ dma_addr_t tx_skb_dma; /* Allocated DMA address for bus master ctrl DMA. */ @@ -618,7 +618,6 @@ struct vortex_private { /* The remainder are related to chip state, mostly media selection. */ struct timer_list timer; /* Media selection timer. */ - struct timer_list rx_oom_timer; /* Rx skb allocation retry timer */ int options; /* User-settable misc. driver options. */ unsigned int media_override:4, /* Passed-in media type. */ default_media:4, /* Read from the EEPROM/Wn3_Config. */ @@ -760,7 +759,6 @@ static void mdio_sync(struct vortex_private *vp, int bits); static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *vp, int phy_id, int location, int value); static void vortex_timer(struct timer_list *t); -static void rx_oom_timer(struct timer_list *t); static netdev_tx_t vortex_start_xmit(struct sk_buff *skb, struct net_device *dev); static netdev_tx_t boomerang_start_xmit(struct sk_buff *skb, @@ -1601,7 +1599,6 @@ vortex_up(struct net_device *dev) timer_setup(&vp->timer, vortex_timer, 0); mod_timer(&vp->timer, RUN_AT(media_tbl[dev->if_port].wait)); - timer_setup(&vp->rx_oom_timer, rx_oom_timer, 0); if (vortex_debug > 1) pr_debug("%s: Initial media type %s.\n", @@ -1676,7 +1673,7 @@ vortex_up(struct net_device *dev) window_write16(vp, 0x0040, 4, Wn4_NetDiag); if (vp->full_bus_master_rx) { /* Boomerang bus master. */ - vp->cur_rx = vp->dirty_rx = 0; + vp->cur_rx = 0; /* Initialize the RxEarly register as recommended. */ iowrite16(SetRxThreshold + (1536>>2), ioaddr + EL3_CMD); iowrite32(0x0020, ioaddr + PktStatus); @@ -1729,6 +1726,7 @@ vortex_open(struct net_device *dev) struct vortex_private *vp = netdev_priv(dev); int i; int retval; + dma_addr_t dma; /* Use the now-standard shared IRQ implementation. */ if ((retval = request_irq(dev->irq, vp->full_bus_master_rx ? @@ -1753,7 +1751,11 @@ vortex_open(struct net_device *dev) break; /* Bad news! */ skb_reserve(skb, NET_IP_ALIGN); /* Align IP on 16 byte boundaries */ - vp->rx_ring[i].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE)); + dma = pci_map_single(VORTEX_PCI(vp), skb->data, + PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma)) + break; + vp->rx_ring[i].addr = cpu_to_le32(dma); } if (i != RX_RING_SIZE) { pr_emerg("%s: no memory for rx ring\n", dev->name); @@ -2067,6 +2069,12 @@ vortex_start_xmit(struct sk_buff *skb, struct net_device *dev) int len = (skb->len + 3) & ~3; vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len, PCI_DMA_TODEVICE); + if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma)) { + dev_kfree_skb_any(skb); + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + spin_lock_irq(&vp->window_lock); window_set(vp, 7); iowrite32(vp->tx_skb_dma, ioaddr + Wn7_MasterAddr); @@ -2593,7 +2601,7 @@ boomerang_rx(struct net_device *dev) int entry = vp->cur_rx % RX_RING_SIZE; void __iomem *ioaddr = vp->ioaddr; int rx_status; - int rx_work_limit = vp->dirty_rx + RX_RING_SIZE - vp->cur_rx; + int rx_work_limit = RX_RING_SIZE; if (vortex_debug > 5) pr_debug("boomerang_rx(): status %4.4x\n", ioread16(ioaddr+EL3_STATUS)); @@ -2614,7 +2622,8 @@ boomerang_rx(struct net_device *dev) } else { /* The packet length: up to 4.5K!. */ int pkt_len = rx_status & 0x1fff; - struct sk_buff *skb; + struct sk_buff *skb, *newskb; + dma_addr_t newdma; dma_addr_t dma = le32_to_cpu(vp->rx_ring[entry].addr); if (vortex_debug > 4) @@ -2633,9 +2642,27 @@ boomerang_rx(struct net_device *dev) pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); vp->rx_copy++; } else { + /* Pre-allocate the replacement skb. If it or its + * mapping fails then recycle the buffer thats already + * in place + */ + newskb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ); + if (!newskb) { + dev->stats.rx_dropped++; + goto clear_complete; + } + newdma = pci_map_single(VORTEX_PCI(vp), newskb->data, + PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&VORTEX_PCI(vp)->dev, newdma)) { + dev->stats.rx_dropped++; + consume_skb(newskb); + goto clear_complete; + } + /* Pass up the skbuff already on the Rx ring. */ skb = vp->rx_skbuff[entry]; - vp->rx_skbuff[entry] = NULL; + vp->rx_skbuff[entry] = newskb; + vp->rx_ring[entry].addr = cpu_to_le32(newdma); skb_put(skb, pkt_len); pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); vp->rx_nocopy++; @@ -2653,55 +2680,15 @@ boomerang_rx(struct net_device *dev) netif_rx(skb); dev->stats.rx_packets++; } - entry = (++vp->cur_rx) % RX_RING_SIZE; - } - /* Refill the Rx ring buffers. */ - for (; vp->cur_rx - vp->dirty_rx > 0; vp->dirty_rx++) { - struct sk_buff *skb; - entry = vp->dirty_rx % RX_RING_SIZE; - if (vp->rx_skbuff[entry] == NULL) { - skb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ); - if (skb == NULL) { - static unsigned long last_jif; - if (time_after(jiffies, last_jif + 10 * HZ)) { - pr_warn("%s: memory shortage\n", - dev->name); - last_jif = jiffies; - } - if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) - mod_timer(&vp->rx_oom_timer, RUN_AT(HZ * 1)); - break; /* Bad news! */ - } - vp->rx_ring[entry].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE)); - vp->rx_skbuff[entry] = skb; - } +clear_complete: vp->rx_ring[entry].status = 0; /* Clear complete bit. */ iowrite16(UpUnstall, ioaddr + EL3_CMD); + entry = (++vp->cur_rx) % RX_RING_SIZE; } return 0; } -/* - * If we've hit a total OOM refilling the Rx ring we poll once a second - * for some memory. Otherwise there is no way to restart the rx process. - */ -static void -rx_oom_timer(struct timer_list *t) -{ - struct vortex_private *vp = from_timer(vp, t, rx_oom_timer); - struct net_device *dev = vp->mii.dev; - - spin_lock_irq(&vp->lock); - if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) /* This test is redundant, but makes me feel good */ - boomerang_rx(dev); - if (vortex_debug > 1) { - pr_debug("%s: rx_oom_timer %s\n", dev->name, - ((vp->cur_rx - vp->dirty_rx) != RX_RING_SIZE) ? "succeeded" : "retrying"); - } - spin_unlock_irq(&vp->lock); -} - static void vortex_down(struct net_device *dev, int final_down) { @@ -2711,7 +2698,6 @@ vortex_down(struct net_device *dev, int final_down) netdev_reset_queue(dev); netif_stop_queue(dev); - del_timer_sync(&vp->rx_oom_timer); del_timer_sync(&vp->timer); /* Turn off statistics ASAP. We update dev->stats below. */ diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index a6f283232cb7..6975150d144e 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -75,6 +75,9 @@ static struct workqueue_struct *ena_wq; MODULE_DEVICE_TABLE(pci, ena_pci_tbl); static int ena_rss_init_default(struct ena_adapter *adapter); +static void check_for_admin_com_state(struct ena_adapter *adapter); +static void ena_destroy_device(struct ena_adapter *adapter); +static int ena_restore_device(struct ena_adapter *adapter); static void ena_tx_timeout(struct net_device *dev) { @@ -1570,7 +1573,7 @@ static int ena_rss_configure(struct ena_adapter *adapter) static int ena_up_complete(struct ena_adapter *adapter) { - int rc, i; + int rc; rc = ena_rss_configure(adapter); if (rc) @@ -1589,17 +1592,6 @@ static int ena_up_complete(struct ena_adapter *adapter) ena_napi_enable_all(adapter); - /* Enable completion queues interrupt */ - for (i = 0; i < adapter->num_queues; i++) - ena_unmask_interrupt(&adapter->tx_ring[i], - &adapter->rx_ring[i]); - - /* schedule napi in case we had pending packets - * from the last time we disable napi - */ - for (i = 0; i < adapter->num_queues; i++) - napi_schedule(&adapter->ena_napi[i].napi); - return 0; } @@ -1736,7 +1728,7 @@ create_err: static int ena_up(struct ena_adapter *adapter) { - int rc; + int rc, i; netdev_dbg(adapter->netdev, "%s\n", __func__); @@ -1779,6 +1771,17 @@ static int ena_up(struct ena_adapter *adapter) set_bit(ENA_FLAG_DEV_UP, &adapter->flags); + /* Enable completion queues interrupt */ + for (i = 0; i < adapter->num_queues; i++) + ena_unmask_interrupt(&adapter->tx_ring[i], + &adapter->rx_ring[i]); + + /* schedule napi in case we had pending packets + * from the last time we disable napi + */ + for (i = 0; i < adapter->num_queues; i++) + napi_schedule(&adapter->ena_napi[i].napi); + return rc; err_up: @@ -1889,6 +1892,17 @@ static int ena_close(struct net_device *netdev) if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) ena_down(adapter); + /* Check for device status and issue reset if needed*/ + check_for_admin_com_state(adapter); + if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + netif_err(adapter, ifdown, adapter->netdev, + "Destroy failure, restarting device\n"); + ena_dump_stats_to_dmesg(adapter); + /* rtnl lock already obtained in dev_ioctl() layer */ + ena_destroy_device(adapter); + ena_restore_device(adapter); + } + return 0; } @@ -2549,11 +2563,12 @@ static void ena_destroy_device(struct ena_adapter *adapter) ena_com_set_admin_running_state(ena_dev, false); - ena_close(netdev); + if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + ena_down(adapter); /* Before releasing the ENA resources, a device reset is required. * (to prevent the device from accessing them). - * In case the reset flag is set and the device is up, ena_close + * In case the reset flag is set and the device is up, ena_down() * already perform the reset, so it can be skipped. */ if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up)) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 5ee18660bc33..c9617675f934 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -70,7 +70,7 @@ static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id) netdev_err(bp->dev, "vf ndo called though sriov is disabled\n"); return -EINVAL; } - if (vf_id >= bp->pf.max_vfs) { + if (vf_id >= bp->pf.active_vfs) { netdev_err(bp->dev, "Invalid VF id %d\n", vf_id); return -EINVAL; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 2ae5ed151369..1d9b08c20f95 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -416,7 +416,7 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, } /* If all IP and L4 fields are wildcarded then this is an L2 flow */ - if (is_wildcard(&l3_mask, sizeof(l3_mask)) && + if (is_wildcard(l3_mask, sizeof(*l3_mask)) && is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) { flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2; } else { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 69d0b64e6986..1ff71825868c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -345,7 +345,6 @@ struct adapter_params { unsigned int sf_size; /* serial flash size in bytes */ unsigned int sf_nsec; /* # of flash sectors */ - unsigned int sf_fw_start; /* start of FW image in flash */ unsigned int fw_vers; /* firmware version */ unsigned int bs_vers; /* bootstrap version */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index f2a60e01d563..0e9f64a46ac5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -2847,8 +2847,6 @@ enum { SF_RD_DATA_FAST = 0xb, /* read flash */ SF_RD_ID = 0x9f, /* read ID */ SF_ERASE_SECTOR = 0xd8, /* erase sector */ - - FW_MAX_SIZE = 16 * SF_SEC_SIZE, }; /** @@ -3561,8 +3559,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) const __be32 *p = (const __be32 *)fw_data; const struct fw_hdr *hdr = (const struct fw_hdr *)fw_data; unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec; - unsigned int fw_img_start = adap->params.sf_fw_start; - unsigned int fw_start_sec = fw_img_start / sf_sec_size; + unsigned int fw_start_sec = FLASH_FW_START_SEC; + unsigned int fw_size = FLASH_FW_MAX_SIZE; + unsigned int fw_start = FLASH_FW_START; if (!size) { dev_err(adap->pdev_dev, "FW image has no data\n"); @@ -3578,9 +3577,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) "FW image size differs from size in FW header\n"); return -EINVAL; } - if (size > FW_MAX_SIZE) { + if (size > fw_size) { dev_err(adap->pdev_dev, "FW image too large, max is %u bytes\n", - FW_MAX_SIZE); + fw_size); return -EFBIG; } if (!t4_fw_matches_chip(adap, hdr)) @@ -3607,11 +3606,11 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) */ memcpy(first_page, fw_data, SF_PAGE_SIZE); ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff); - ret = t4_write_flash(adap, fw_img_start, SF_PAGE_SIZE, first_page); + ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page); if (ret) goto out; - addr = fw_img_start; + addr = fw_start; for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) { addr += SF_PAGE_SIZE; fw_data += SF_PAGE_SIZE; @@ -3621,7 +3620,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) } ret = t4_write_flash(adap, - fw_img_start + offsetof(struct fw_hdr, fw_ver), + fw_start + offsetof(struct fw_hdr, fw_ver), sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver); out: if (ret) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index e17d10b8b041..90aa69a08922 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3489,6 +3489,10 @@ fec_probe(struct platform_device *pdev) goto failed_regulator; } } else { + if (PTR_ERR(fep->reg_phy) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto failed_regulator; + } fep->reg_phy = NULL; } @@ -3572,8 +3576,9 @@ failed_clk_ipg: failed_clk: if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); -failed_phy: of_node_put(phy_node); +failed_phy: + dev_id--; failed_ioremap: free_netdev(ndev); diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h index d7bdea79e9fa..8fd2458060a0 100644 --- a/drivers/net/ethernet/intel/e1000/e1000.h +++ b/drivers/net/ethernet/intel/e1000/e1000.h @@ -331,7 +331,8 @@ struct e1000_adapter { enum e1000_state_t { __E1000_TESTING, __E1000_RESETTING, - __E1000_DOWN + __E1000_DOWN, + __E1000_DISABLED }; #undef pr_fmt diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 1982f7917a8d..3dd4aeb2706d 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -945,7 +945,7 @@ static int e1000_init_hw_struct(struct e1000_adapter *adapter, static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *netdev; - struct e1000_adapter *adapter; + struct e1000_adapter *adapter = NULL; struct e1000_hw *hw; static int cards_found; @@ -955,6 +955,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) u16 tmp = 0; u16 eeprom_apme_mask = E1000_EEPROM_APME; int bars, need_ioport; + bool disable_dev = false; /* do not allocate ioport bars when not needed */ need_ioport = e1000_is_need_ioport(pdev); @@ -1259,11 +1260,13 @@ err_mdio_ioremap: iounmap(hw->ce4100_gbe_mdio_base_virt); iounmap(hw->hw_addr); err_ioremap: + disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags); free_netdev(netdev); err_alloc_etherdev: pci_release_selected_regions(pdev, bars); err_pci_reg: - pci_disable_device(pdev); + if (!adapter || disable_dev) + pci_disable_device(pdev); return err; } @@ -1281,6 +1284,7 @@ static void e1000_remove(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + bool disable_dev; e1000_down_and_stop(adapter); e1000_release_manageability(adapter); @@ -1299,9 +1303,11 @@ static void e1000_remove(struct pci_dev *pdev) iounmap(hw->flash_address); pci_release_selected_regions(pdev, adapter->bars); + disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags); free_netdev(netdev); - pci_disable_device(pdev); + if (disable_dev) + pci_disable_device(pdev); } /** @@ -5156,7 +5162,8 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake) if (netif_running(netdev)) e1000_free_irq(adapter); - pci_disable_device(pdev); + if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags)) + pci_disable_device(pdev); return 0; } @@ -5200,6 +5207,10 @@ static int e1000_resume(struct pci_dev *pdev) pr_err("Cannot enable PCI device from suspend\n"); return err; } + + /* flush memory to make sure state is correct */ + smp_mb__before_atomic(); + clear_bit(__E1000_DISABLED, &adapter->flags); pci_set_master(pdev); pci_enable_wake(pdev, PCI_D3hot, 0); @@ -5274,7 +5285,9 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, if (netif_running(netdev)) e1000_down(adapter); - pci_disable_device(pdev); + + if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags)) + pci_disable_device(pdev); /* Request a slot slot reset. */ return PCI_ERS_RESULT_NEED_RESET; @@ -5302,6 +5315,10 @@ static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev) pr_err("Cannot re-enable PCI device after reset.\n"); return PCI_ERS_RESULT_DISCONNECT; } + + /* flush memory to make sure state is correct */ + smp_mb__before_atomic(); + clear_bit(__E1000_DISABLED, &adapter->flags); pci_set_master(pdev); pci_enable_wake(pdev, PCI_D3hot, 0); diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index d6d4ed7acf03..31277d3bb7dc 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1367,6 +1367,9 @@ out: * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. + * + * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link + * up). **/ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) { @@ -1382,7 +1385,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 0; + return 1; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -1613,10 +1616,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) + if (ret_val) { e_dbg("Error configuring flow control\n"); + return ret_val; + } - return ret_val; + return 1; } static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 321d8be80871..42dcaefc4c19 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1573,11 +1573,18 @@ static int i40e_set_mac(struct net_device *netdev, void *p) else netdev_info(netdev, "set new mac address %pM\n", addr->sa_data); + /* Copy the address first, so that we avoid a possible race with + * .set_rx_mode(). If we copy after changing the address in the filter + * list, we might open ourselves to a narrow race window where + * .set_rx_mode could delete our dev_addr filter and prevent traffic + * from passing. + */ + ether_addr_copy(netdev->dev_addr, addr->sa_data); + spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_del_mac_filter(vsi, netdev->dev_addr); i40e_add_mac_filter(vsi, addr->sa_data); spin_unlock_bh(&vsi->mac_filter_hash_lock); - ether_addr_copy(netdev->dev_addr, addr->sa_data); if (vsi->type == I40E_VSI_MAIN) { i40e_status ret; @@ -1923,6 +1930,14 @@ static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr) struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; + /* Under some circumstances, we might receive a request to delete + * our own device address from our uc list. Because we store the + * device address in the VSI's MAC/VLAN filter list, we need to ignore + * such requests and not delete our device address from this list. + */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; + i40e_del_mac_filter(vsi, addr); return 0; @@ -6038,8 +6053,8 @@ static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi) /* Set Bit 7 to be valid */ mode = I40E_AQ_SET_SWITCH_BIT7_VALID; - /* Set L4type to both TCP and UDP support */ - mode |= I40E_AQ_SET_SWITCH_L4_TYPE_BOTH; + /* Set L4type for TCP support */ + mode |= I40E_AQ_SET_SWITCH_L4_TYPE_TCP; /* Set cloud filter mode */ mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL; @@ -6969,18 +6984,18 @@ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, is_valid_ether_addr(filter->src_mac)) || (is_multicast_ether_addr(filter->dst_mac) && is_multicast_ether_addr(filter->src_mac))) - return -EINVAL; + return -EOPNOTSUPP; - /* Make sure port is specified, otherwise bail out, for channel - * specific cloud filter needs 'L4 port' to be non-zero + /* Big buffer cloud filter needs 'L4 port' to be non-zero. Also, UDP + * ports are not supported via big buffer now. */ - if (!filter->dst_port) - return -EINVAL; + if (!filter->dst_port || filter->ip_proto == IPPROTO_UDP) + return -EOPNOTSUPP; /* adding filter using src_port/src_ip is not supported at this stage */ if (filter->src_port || filter->src_ipv4 || !ipv6_addr_any(&filter->ip.v6.src_ip6)) - return -EINVAL; + return -EOPNOTSUPP; /* copy element needed to add cloud filter from filter */ i40e_set_cld_element(filter, &cld_filter.element); @@ -6991,7 +7006,7 @@ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, is_multicast_ether_addr(filter->src_mac)) { /* MAC + IP : unsupported mode */ if (filter->dst_ipv4) - return -EINVAL; + return -EOPNOTSUPP; /* since we validated that L4 port must be valid before * we get here, start with respective "flags" value @@ -7356,7 +7371,7 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, if (tc < 0) { dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n"); - return -EINVAL; + return -EOPNOTSUPP; } if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 2a8a85e3ae8f..40edb6e5e6f6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3061,10 +3061,30 @@ bool __i40e_chk_linearize(struct sk_buff *skb) /* Walk through fragments adding latest fragment, testing it, and * then removing stale fragments from the sum. */ - stale = &skb_shinfo(skb)->frags[0]; - for (;;) { + for (stale = &skb_shinfo(skb)->frags[0];; stale++) { + int stale_size = skb_frag_size(stale); + sum += skb_frag_size(frag++); + /* The stale fragment may present us with a smaller + * descriptor than the actual fragment size. To account + * for that we need to remove all the data on the front and + * figure out what the remainder would be in the last + * descriptor associated with the fragment. + */ + if (stale_size > I40E_MAX_DATA_PER_TXD) { + int align_pad = -(stale->page_offset) & + (I40E_MAX_READ_REQ_SIZE - 1); + + sum -= align_pad; + stale_size -= align_pad; + + do { + sum -= I40E_MAX_DATA_PER_TXD_ALIGNED; + stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED; + } while (stale_size > I40E_MAX_DATA_PER_TXD); + } + /* if sum is negative we failed to make sufficient progress */ if (sum < 0) return true; @@ -3072,7 +3092,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb) if (!nr_frags--) break; - sum -= skb_frag_size(stale++); + sum -= stale_size; } return false; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 50864f99446d..1ba29bb85b67 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -2012,10 +2012,30 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) /* Walk through fragments adding latest fragment, testing it, and * then removing stale fragments from the sum. */ - stale = &skb_shinfo(skb)->frags[0]; - for (;;) { + for (stale = &skb_shinfo(skb)->frags[0];; stale++) { + int stale_size = skb_frag_size(stale); + sum += skb_frag_size(frag++); + /* The stale fragment may present us with a smaller + * descriptor than the actual fragment size. To account + * for that we need to remove all the data on the front and + * figure out what the remainder would be in the last + * descriptor associated with the fragment. + */ + if (stale_size > I40E_MAX_DATA_PER_TXD) { + int align_pad = -(stale->page_offset) & + (I40E_MAX_READ_REQ_SIZE - 1); + + sum -= align_pad; + stale_size -= align_pad; + + do { + sum -= I40E_MAX_DATA_PER_TXD_ALIGNED; + stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED; + } while (stale_size > I40E_MAX_DATA_PER_TXD); + } + /* if sum is negative we failed to make sufficient progress */ if (sum < 0) return true; @@ -2023,7 +2043,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) if (!nr_frags--) break; - sum -= skb_frag_size(stale++); + sum -= stale_size; } return false; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index d373df7b11bd..aa96cc6a0fa6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4422,7 +4422,10 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, } if (!info->linking) break; - if (netdev_has_any_upper_dev(upper_dev)) { + if (netdev_has_any_upper_dev(upper_dev) && + (!netif_is_bridge_master(upper_dev) || + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, + upper_dev))) { NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; @@ -4550,6 +4553,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct netdev_notifier_changeupper_info *info = ptr; struct netlink_ext_ack *extack; struct net_device *upper_dev; @@ -4566,7 +4570,10 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, } if (!info->linking) break; - if (netdev_has_any_upper_dev(upper_dev)) { + if (netdev_has_any_upper_dev(upper_dev) && + (!netif_is_bridge_master(upper_dev) || + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, + upper_dev))) { NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a0adcd886589..346b8b688b6f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -366,6 +366,8 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *brport_dev, struct net_device *br_dev); +bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev); /* spectrum.c */ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index be657b8533f0..434b3922b34f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3228,7 +3228,7 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, { if (!removing) nh->should_offload = 1; - else if (nh->offloaded) + else nh->should_offload = 0; nh->update = 1; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 7b8548e25ae7..593ad31be749 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -152,6 +152,12 @@ mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge, return NULL; } +bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev) +{ + return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); +} + static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, struct net_device *br_dev) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index d47bbbb22e7c..7aa1c12750b3 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -147,7 +147,7 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = { [FWNLCR0] = 0x0090, [FWALCR0] = 0x0094, [TXNLCR1] = 0x00a0, - [TXALCR1] = 0x00a0, + [TXALCR1] = 0x00a4, [RXNLCR1] = 0x00a8, [RXALCR1] = 0x00ac, [FWNLCR1] = 0x00b0, @@ -399,7 +399,7 @@ static const u16 sh_eth_offset_fast_sh3_sh2[SH_ETH_MAX_REGISTER_OFFSET] = { [FWNLCR0] = 0x0090, [FWALCR0] = 0x0094, [TXNLCR1] = 0x00a0, - [TXALCR1] = 0x00a0, + [TXALCR1] = 0x00a4, [RXNLCR1] = 0x00a8, [RXALCR1] = 0x00ac, [FWNLCR1] = 0x00b0, @@ -3225,18 +3225,37 @@ static int sh_eth_drv_probe(struct platform_device *pdev) /* ioremap the TSU registers */ if (mdp->cd->tsu) { struct resource *rtsu; + rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1); - mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu); - if (IS_ERR(mdp->tsu_addr)) { - ret = PTR_ERR(mdp->tsu_addr); + if (!rtsu) { + dev_err(&pdev->dev, "no TSU resource\n"); + ret = -ENODEV; + goto out_release; + } + /* We can only request the TSU region for the first port + * of the two sharing this TSU for the probe to succeed... + */ + if (devno % 2 == 0 && + !devm_request_mem_region(&pdev->dev, rtsu->start, + resource_size(rtsu), + dev_name(&pdev->dev))) { + dev_err(&pdev->dev, "can't request TSU resource.\n"); + ret = -EBUSY; + goto out_release; + } + mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start, + resource_size(rtsu)); + if (!mdp->tsu_addr) { + dev_err(&pdev->dev, "TSU region ioremap() failed.\n"); + ret = -ENOMEM; goto out_release; } mdp->port = devno % 2; ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER; } - /* initialize first or needed device */ - if (!devno || pd->needs_init) { + /* Need to init only the first port of the two sharing a TSU */ + if (devno % 2 == 0) { if (mdp->cd->chip_reset) mdp->cd->chip_reset(ndev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d9c98fd810bb..f99f14c35063 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -364,9 +364,15 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t) bool stmmac_eee_init(struct stmmac_priv *priv) { struct net_device *ndev = priv->dev; + int interface = priv->plat->interface; unsigned long flags; bool ret = false; + if ((interface != PHY_INTERFACE_MODE_MII) && + (interface != PHY_INTERFACE_MODE_GMII) && + !phy_interface_mode_is_rgmii(interface)) + goto out; + /* Using PCS we cannot dial with the phy registers at this stage * so we do not support extra feature like EEE. */ diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 667c44f68dbc..195e0d0add8d 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -825,6 +825,13 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (IS_ERR(rt)) return PTR_ERR(rt); + if (skb_dst(skb)) { + int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) - + GENEVE_BASE_HLEN - info->options_len - 14; + + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + } + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); @@ -864,6 +871,13 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (IS_ERR(dst)) return PTR_ERR(dst); + if (skb_dst(skb)) { + int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) - + GENEVE_BASE_HLEN - info->options_len - 14; + + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + } + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index a178c5efd33e..a0f2be81d52e 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1444,9 +1444,14 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, return 0; unregister_netdev: + /* macvlan_uninit would free the macvlan port */ unregister_netdevice(dev); + return err; destroy_macvlan_port: - if (create) + /* the macvlan port may be freed by macvlan_uninit when fail to register. + * so we destroy the macvlan port only when it's valid. + */ + if (create && macvlan_port_get_rtnl(dev)) macvlan_port_destroy(port->dev); return err; } diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c index 135296508a7e..6425ce04d3f9 100644 --- a/drivers/net/phy/mdio-sun4i.c +++ b/drivers/net/phy/mdio-sun4i.c @@ -118,8 +118,10 @@ static int sun4i_mdio_probe(struct platform_device *pdev) data->regulator = devm_regulator_get(&pdev->dev, "phy"); if (IS_ERR(data->regulator)) { - if (PTR_ERR(data->regulator) == -EPROBE_DEFER) - return -EPROBE_DEFER; + if (PTR_ERR(data->regulator) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto err_out_free_mdiobus; + } dev_info(&pdev->dev, "no regulator found\n"); data->regulator = NULL; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index d1f9466f2fbf..6ac8b29b2dc3 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1525,6 +1525,7 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGMIIPHY: mii->phy_id = pl->phydev->mdio.addr; + /* fall through */ case SIOCGMIIREG: ret = phylink_phy_read(pl, mii->phy_id, mii->reg_num); @@ -1547,6 +1548,7 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGMIIPHY: mii->phy_id = 0; + /* fall through */ case SIOCGMIIREG: ret = phylink_mii_read(pl, mii->phy_id, mii->reg_num); @@ -1659,9 +1661,8 @@ static void phylink_sfp_link_down(void *upstream) ASSERT_RTNL(); set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state); + queue_work(system_power_efficient_wq, &pl->resolve); flush_work(&pl->resolve); - - netif_carrier_off(pl->netdev); } static void phylink_sfp_link_up(void *upstream) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 3ecc378e0716..bdc4bb3c8288 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -489,7 +489,8 @@ EXPORT_SYMBOL_GPL(sfp_register_upstream); void sfp_unregister_upstream(struct sfp_bus *bus) { rtnl_lock(); - sfp_unregister_bus(bus); + if (bus->sfp) + sfp_unregister_bus(bus); bus->upstream = NULL; bus->netdev = NULL; rtnl_unlock(); @@ -592,7 +593,8 @@ EXPORT_SYMBOL_GPL(sfp_register_socket); void sfp_unregister_socket(struct sfp_bus *bus) { rtnl_lock(); - sfp_unregister_bus(bus); + if (bus->netdev) + sfp_unregister_bus(bus); bus->sfp_dev = NULL; bus->sfp = NULL; bus->socket_ops = NULL; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index cfaa07f230e5..ae0580b577b8 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1100,6 +1100,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x05c6, 0x9084, 4)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 0)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 5)}, + {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */ {QMI_FIXED_INTF(0x0846, 0x68a2, 8)}, {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */ diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index c5a34671abda..9bd7ddeeb6a5 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1326,6 +1326,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netif_carrier_off(netdev); + xenbus_switch_state(dev, XenbusStateInitialising); return netdev; exit: diff --git a/drivers/nvmem/meson-mx-efuse.c b/drivers/nvmem/meson-mx-efuse.c index a346b4923550..41d3a3c1104e 100644 --- a/drivers/nvmem/meson-mx-efuse.c +++ b/drivers/nvmem/meson-mx-efuse.c @@ -156,8 +156,8 @@ static int meson_mx_efuse_read(void *context, unsigned int offset, MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE, MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE); - for (i = offset; i < offset + bytes; i += efuse->config.word_size) { - addr = i / efuse->config.word_size; + for (i = 0; i < bytes; i += efuse->config.word_size) { + addr = (offset + i) / efuse->config.word_size; err = meson_mx_efuse_read_addr(efuse, addr, &tmp); if (err) diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index 0b3fb99d9b89..7390fb8ca9d1 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -303,7 +303,7 @@ static void dino_mask_irq(struct irq_data *d) struct dino_device *dino_dev = irq_data_get_irq_chip_data(d); int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS); - DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, d->irq); + DBG(KERN_WARNING "%s(0x%px, %d)\n", __func__, dino_dev, d->irq); /* Clear the matching bit in the IMR register */ dino_dev->imr &= ~(DINO_MASK_IRQ(local_irq)); @@ -316,7 +316,7 @@ static void dino_unmask_irq(struct irq_data *d) int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS); u32 tmp; - DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, d->irq); + DBG(KERN_WARNING "%s(0x%px, %d)\n", __func__, dino_dev, d->irq); /* ** clear pending IRQ bits @@ -396,7 +396,7 @@ ilr_again: if (mask) { if (--ilr_loop > 0) goto ilr_again; - printk(KERN_ERR "Dino 0x%p: stuck interrupt %d\n", + printk(KERN_ERR "Dino 0x%px: stuck interrupt %d\n", dino_dev->hba.base_addr, mask); return IRQ_NONE; } @@ -553,7 +553,7 @@ dino_fixup_bus(struct pci_bus *bus) struct pci_dev *dev; struct dino_device *dino_dev = DINO_DEV(parisc_walk_tree(bus->bridge)); - DBG(KERN_WARNING "%s(0x%p) bus %d platform_data 0x%p\n", + DBG(KERN_WARNING "%s(0x%px) bus %d platform_data 0x%px\n", __func__, bus, bus->busn_res.start, bus->bridge->platform_data); @@ -854,7 +854,7 @@ static int __init dino_common_init(struct parisc_device *dev, res->flags = IORESOURCE_IO; /* do not mark it busy ! */ if (request_resource(&ioport_resource, res) < 0) { printk(KERN_ERR "%s: request I/O Port region failed " - "0x%lx/%lx (hpa 0x%p)\n", + "0x%lx/%lx (hpa 0x%px)\n", name, (unsigned long)res->start, (unsigned long)res->end, dino_dev->hba.base_addr); return 1; diff --git a/drivers/parisc/eisa_eeprom.c b/drivers/parisc/eisa_eeprom.c index 4dd9b1308128..99a80da6fd2e 100644 --- a/drivers/parisc/eisa_eeprom.c +++ b/drivers/parisc/eisa_eeprom.c @@ -106,7 +106,7 @@ static int __init eisa_eeprom_init(void) return retval; } - printk(KERN_INFO "EISA EEPROM at 0x%p\n", eisa_eeprom_addr); + printk(KERN_INFO "EISA EEPROM at 0x%px\n", eisa_eeprom_addr); return 0; } diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index 04dac6a42c9f..6b8d060d07de 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -985,9 +985,7 @@ static u32 hv_compose_msi_req_v1( int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.vector_count = 1; - int_pkt->int_desc.delivery_mode = - (apic->irq_delivery_mode == dest_LowestPrio) ? - dest_LowestPrio : dest_Fixed; + int_pkt->int_desc.delivery_mode = dest_Fixed; /* * Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in @@ -1008,9 +1006,7 @@ static u32 hv_compose_msi_req_v2( int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.vector_count = 1; - int_pkt->int_desc.delivery_mode = - (apic->irq_delivery_mode == dest_LowestPrio) ? - dest_LowestPrio : dest_Fixed; + int_pkt->int_desc.delivery_mode = dest_Fixed; /* * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten diff --git a/drivers/phy/motorola/phy-cpcap-usb.c b/drivers/phy/motorola/phy-cpcap-usb.c index accaaaccb662..6601ad0dfb3a 100644 --- a/drivers/phy/motorola/phy-cpcap-usb.c +++ b/drivers/phy/motorola/phy-cpcap-usb.c @@ -310,7 +310,7 @@ static int cpcap_usb_init_irq(struct platform_device *pdev, int irq, error; irq = platform_get_irq_byname(pdev, name); - if (!irq) + if (irq < 0) return -ENODEV; error = devm_request_threaded_irq(ddata->dev, irq, NULL, diff --git a/drivers/phy/renesas/Kconfig b/drivers/phy/renesas/Kconfig index cb09245e9b4c..c845facacb06 100644 --- a/drivers/phy/renesas/Kconfig +++ b/drivers/phy/renesas/Kconfig @@ -12,7 +12,9 @@ config PHY_RCAR_GEN3_USB2 tristate "Renesas R-Car generation 3 USB 2.0 PHY driver" depends on ARCH_RENESAS depends on EXTCON + depends on USB_SUPPORT select GENERIC_PHY + select USB_COMMON help Support for USB 2.0 PHY found on Renesas R-Car generation 3 SoCs. diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c index ee85fa0ca4b0..7492c8978217 100644 --- a/drivers/phy/rockchip/phy-rockchip-typec.c +++ b/drivers/phy/rockchip/phy-rockchip-typec.c @@ -1137,6 +1137,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev) if (IS_ERR(phy)) { dev_err(dev, "failed to create phy: %s\n", child_np->name); + pm_runtime_disable(dev); return PTR_ERR(phy); } @@ -1146,6 +1147,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev) phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); if (IS_ERR(phy_provider)) { dev_err(dev, "Failed to register phy provider\n"); + pm_runtime_disable(dev); return PTR_ERR(phy_provider); } diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 4307bf0013e1..63e916d4d069 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -75,14 +75,14 @@ MODULE_DEVICE_TABLE(of, tegra_xusb_padctl_of_match); static struct device_node * tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name) { - /* - * of_find_node_by_name() drops a reference, so make sure to grab one. - */ - struct device_node *np = of_node_get(padctl->dev->of_node); + struct device_node *pads, *np; + + pads = of_get_child_by_name(padctl->dev->of_node, "pads"); + if (!pads) + return NULL; - np = of_find_node_by_name(np, "pads"); - if (np) - np = of_find_node_by_name(np, name); + np = of_get_child_by_name(pads, name); + of_node_put(pads); return np; } @@ -90,16 +90,16 @@ tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name) static struct device_node * tegra_xusb_pad_find_phy_node(struct tegra_xusb_pad *pad, unsigned int index) { - /* - * of_find_node_by_name() drops a reference, so make sure to grab one. - */ - struct device_node *np = of_node_get(pad->dev.of_node); + struct device_node *np, *lanes; - np = of_find_node_by_name(np, "lanes"); - if (!np) + lanes = of_get_child_by_name(pad->dev.of_node, "lanes"); + if (!lanes) return NULL; - return of_find_node_by_name(np, pad->soc->lanes[index].name); + np = of_get_child_by_name(lanes, pad->soc->lanes[index].name); + of_node_put(lanes); + + return np; } static int @@ -195,7 +195,7 @@ int tegra_xusb_pad_register(struct tegra_xusb_pad *pad, unsigned int i; int err; - children = of_find_node_by_name(pad->dev.of_node, "lanes"); + children = of_get_child_by_name(pad->dev.of_node, "lanes"); if (!children) return -ENODEV; @@ -444,21 +444,21 @@ static struct device_node * tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type, unsigned int index) { - /* - * of_find_node_by_name() drops a reference, so make sure to grab one. - */ - struct device_node *np = of_node_get(padctl->dev->of_node); + struct device_node *ports, *np; + char *name; - np = of_find_node_by_name(np, "ports"); - if (np) { - char *name; + ports = of_get_child_by_name(padctl->dev->of_node, "ports"); + if (!ports) + return NULL; - name = kasprintf(GFP_KERNEL, "%s-%u", type, index); - if (!name) - return ERR_PTR(-ENOMEM); - np = of_find_node_by_name(np, name); - kfree(name); + name = kasprintf(GFP_KERNEL, "%s-%u", type, index); + if (!name) { + of_node_put(ports); + return ERR_PTR(-ENOMEM); } + np = of_get_child_by_name(ports, name); + kfree(name); + of_node_put(ports); return np; } @@ -847,7 +847,7 @@ static void tegra_xusb_remove_ports(struct tegra_xusb_padctl *padctl) static int tegra_xusb_padctl_probe(struct platform_device *pdev) { - struct device_node *np = of_node_get(pdev->dev.of_node); + struct device_node *np = pdev->dev.of_node; const struct tegra_xusb_padctl_soc *soc; struct tegra_xusb_padctl *padctl; const struct of_device_id *match; @@ -855,7 +855,7 @@ static int tegra_xusb_padctl_probe(struct platform_device *pdev) int err; /* for backwards compatibility with old device trees */ - np = of_find_node_by_name(np, "pads"); + np = of_get_child_by_name(np, "pads"); if (!np) { dev_warn(&pdev->dev, "deprecated DT, using legacy driver\n"); return tegra_xusb_padctl_legacy_probe(pdev); diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index e6cd8de793e2..3501491e5bfc 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -222,6 +222,9 @@ static enum pin_config_param pcs_bias[] = { */ static struct lock_class_key pcs_lock_class; +/* Class for the IRQ request mutex */ +static struct lock_class_key pcs_request_class; + /* * REVISIT: Reads and writes could eventually use regmap or something * generic. But at least on omaps, some mux registers are performance @@ -1486,7 +1489,7 @@ static int pcs_irqdomain_map(struct irq_domain *d, unsigned int irq, irq_set_chip_data(irq, pcs_soc); irq_set_chip_and_handler(irq, &pcs->chip, handle_level_irq); - irq_set_lockdep_class(irq, &pcs_lock_class); + irq_set_lockdep_class(irq, &pcs_lock_class, &pcs_request_class); irq_set_noprobe(irq); return 0; diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index a276c61be217..e62ab087bfd8 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -290,7 +290,7 @@ static int stm32_gpio_domain_translate(struct irq_domain *d, } static int stm32_gpio_domain_activate(struct irq_domain *d, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { struct stm32_gpio_bank *bank = d->host_data; struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent); diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 791449a2370f..daa68acbc900 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -1458,5 +1458,5 @@ static void __exit acpi_wmi_exit(void) class_unregister(&wmi_bus_class); } -subsys_initcall(acpi_wmi_init); +subsys_initcall_sync(acpi_wmi_init); module_exit(acpi_wmi_exit); diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index c94b606e0df8..ee14d8e45c97 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -2803,6 +2803,16 @@ dasd_3990_erp_action(struct dasd_ccw_req * cqr) erp = dasd_3990_erp_handle_match_erp(cqr, erp); } + + /* + * For path verification work we need to stick with the path that was + * originally chosen so that the per path configuration data is + * assigned correctly. + */ + if (test_bit(DASD_CQR_VERIFY_PATH, &erp->flags) && cqr->lpm) { + erp->lpm = cqr->lpm; + } + if (device->features & DASD_FEATURE_ERPLOG) { /* print current erp_chain */ dev_err(&device->cdev->dev, diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index 05ac6ba15a53..614b44e70a28 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -17,6 +17,8 @@ CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH) CFLAGS_sclp_early_core.o += -march=z900 endif +CFLAGS_sclp_early_core.o += -D__NO_FORTIFY + obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \ sclp_cmd.o sclp_config.o sclp_cpi_sys.o sclp_ocf.o sclp_ctl.o \ sclp_early.o sclp_early_core.o diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index a9996c16f4ae..26ce17178401 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1415,7 +1415,10 @@ static void __scsi_remove_target(struct scsi_target *starget) * check. */ if (sdev->channel != starget->channel || - sdev->id != starget->id || + sdev->id != starget->id) + continue; + if (sdev->sdev_state == SDEV_DEL || + sdev->sdev_state == SDEV_CANCEL || !get_device(&sdev->sdev_gendev)) continue; spin_unlock_irqrestore(shost->host_lock, flags); diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 1b06cf0375dc..3b3d1d050cac 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -953,10 +953,11 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, case TEST_UNIT_READY: break; default: - set_host_byte(scmnd, DID_TARGET_FAILURE); + set_host_byte(scmnd, DID_ERROR); } break; case SRB_STATUS_INVALID_LUN: + set_host_byte(scmnd, DID_NO_CONNECT); do_work = true; process_err_fn = storvsc_remove_lun; break; diff --git a/drivers/staging/android/ion/Kconfig b/drivers/staging/android/ion/Kconfig index a517b2d29f1b..8f6494158d3d 100644 --- a/drivers/staging/android/ion/Kconfig +++ b/drivers/staging/android/ion/Kconfig @@ -37,7 +37,7 @@ config ION_CHUNK_HEAP config ION_CMA_HEAP bool "Ion CMA heap support" - depends on ION && CMA + depends on ION && DMA_CMA help Choose this option to enable CMA heaps with Ion. This heap is backed by the Contiguous Memory Allocator (CMA). If your system has these diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index a7d9b0e98572..f480885e346b 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -346,7 +346,7 @@ static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, mutex_lock(&buffer->lock); list_for_each_entry(a, &buffer->attachments, list) { dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents, - DMA_BIDIRECTIONAL); + direction); } mutex_unlock(&buffer->lock); @@ -368,7 +368,7 @@ static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf, mutex_lock(&buffer->lock); list_for_each_entry(a, &buffer->attachments, list) { dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents, - DMA_BIDIRECTIONAL); + direction); } mutex_unlock(&buffer->lock); diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c index dd5545d9990a..86196ffd2faf 100644 --- a/drivers/staging/android/ion/ion_cma_heap.c +++ b/drivers/staging/android/ion/ion_cma_heap.c @@ -39,9 +39,15 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer, struct ion_cma_heap *cma_heap = to_cma_heap(heap); struct sg_table *table; struct page *pages; + unsigned long size = PAGE_ALIGN(len); + unsigned long nr_pages = size >> PAGE_SHIFT; + unsigned long align = get_order(size); int ret; - pages = cma_alloc(cma_heap->cma, len, 0, GFP_KERNEL); + if (align > CONFIG_CMA_ALIGNMENT) + align = CONFIG_CMA_ALIGNMENT; + + pages = cma_alloc(cma_heap->cma, nr_pages, align, GFP_KERNEL); if (!pages) return -ENOMEM; @@ -53,7 +59,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer, if (ret) goto free_mem; - sg_set_page(table->sgl, pages, len, 0); + sg_set_page(table->sgl, pages, size, 0); buffer->priv_virt = pages; buffer->sg_table = table; @@ -62,7 +68,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer, free_mem: kfree(table); err: - cma_release(cma_heap->cma, pages, buffer->size); + cma_release(cma_heap->cma, pages, nr_pages); return -ENOMEM; } @@ -70,9 +76,10 @@ static void ion_cma_free(struct ion_buffer *buffer) { struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap); struct page *pages = buffer->priv_virt; + unsigned long nr_pages = PAGE_ALIGN(buffer->size) >> PAGE_SHIFT; /* release memory */ - cma_release(cma_heap->cma, pages, buffer->size); + cma_release(cma_heap->cma, pages, nr_pages); /* release sg table */ sg_free_table(buffer->sg_table); kfree(buffer->sg_table); diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c index 986c2a40d978..8267119ccc8e 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c @@ -487,21 +487,18 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr, ksocknal_nid2peerlist(id.nid)); } - route2 = NULL; list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) { - if (route2->ksnr_ipaddr == ipaddr) - break; - - route2 = NULL; - } - if (!route2) { - ksocknal_add_route_locked(peer, route); - route->ksnr_share_count++; - } else { - ksocknal_route_decref(route); - route2->ksnr_share_count++; + if (route2->ksnr_ipaddr == ipaddr) { + /* Route already exists, use the old one */ + ksocknal_route_decref(route); + route2->ksnr_share_count++; + goto out; + } } - + /* Route doesn't already exist, add the new one */ + ksocknal_add_route_locked(peer, route); + route->ksnr_share_count++; +out: write_unlock_bh(&ksocknal_data.ksnd_global_lock); return 0; diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 419a7a90bce0..f45bcbc63738 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -339,7 +339,7 @@ static void __ring_interrupt(struct tb_ring *ring) return; if (ring->start_poll) { - __ring_interrupt_mask(ring, false); + __ring_interrupt_mask(ring, true); ring->start_poll(ring->poll_data); } else { schedule_work(&ring->work); diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 427e0d5d8f13..539b49adb6af 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1762,7 +1762,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old) { struct n_tty_data *ldata = tty->disc_data; - if (!old || (old->c_lflag ^ tty->termios.c_lflag) & ICANON) { + if (!old || (old->c_lflag ^ tty->termios.c_lflag) & (ICANON | EXTPROC)) { bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE); ldata->line_start = ldata->read_tail; if (!L_ICANON(tty) || !read_cnt(ldata)) { @@ -2425,7 +2425,7 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file, return put_user(tty_chars_in_buffer(tty), (int __user *) arg); case TIOCINQ: down_write(&tty->termios_rwsem); - if (L_ICANON(tty)) + if (L_ICANON(tty) && !L_EXTPROC(tty)) retval = inq_canon(ldata); else retval = read_cnt(ldata); diff --git a/drivers/usb/chipidea/ci_hdrc_msm.c b/drivers/usb/chipidea/ci_hdrc_msm.c index 3593ce0ec641..880009987460 100644 --- a/drivers/usb/chipidea/ci_hdrc_msm.c +++ b/drivers/usb/chipidea/ci_hdrc_msm.c @@ -247,7 +247,7 @@ static int ci_hdrc_msm_probe(struct platform_device *pdev) if (ret) goto err_mux; - ulpi_node = of_find_node_by_name(of_node_get(pdev->dev.of_node), "ulpi"); + ulpi_node = of_get_child_by_name(pdev->dev.of_node, "ulpi"); if (ulpi_node) { phy_node = of_get_next_available_child(ulpi_node, NULL); ci->hsic = of_device_is_compatible(phy_node, "qcom,usb-hsic-phy"); diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 78e92d29f8d9..c821b4b9647e 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -1007,7 +1007,7 @@ int usb_get_bos_descriptor(struct usb_device *dev) case USB_SSP_CAP_TYPE: ssp_cap = (struct usb_ssp_cap_descriptor *)buffer; ssac = (le32_to_cpu(ssp_cap->bmAttributes) & - USB_SSP_SUBLINK_SPEED_ATTRIBS) + 1; + USB_SSP_SUBLINK_SPEED_ATTRIBS); if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac)) dev->bos->ssp_cap = ssp_cap; break; diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index a10b346b9777..4024926c1d68 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -52,10 +52,11 @@ static const struct usb_device_id usb_quirk_list[] = { /* Microsoft LifeCam-VX700 v2.0 */ { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, - /* Logitech HD Pro Webcams C920, C920-C and C930e */ + /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */ { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x046d, 0x085b), .driver_info = USB_QUIRK_DELAY_INIT }, /* Logitech ConferenceCam CC3000e */ { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT }, @@ -149,6 +150,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */ { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM }, + /* ELSA MicroLink 56K */ + { USB_DEVICE(0x05cc, 0x2267), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */ { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM }, diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c index 4f7895dbcf88..e26e685d8a57 100644 --- a/drivers/usb/host/xhci-debugfs.c +++ b/drivers/usb/host/xhci-debugfs.c @@ -162,7 +162,7 @@ static void xhci_debugfs_extcap_regset(struct xhci_hcd *xhci, int cap_id, static int xhci_ring_enqueue_show(struct seq_file *s, void *unused) { dma_addr_t dma; - struct xhci_ring *ring = s->private; + struct xhci_ring *ring = *(struct xhci_ring **)s->private; dma = xhci_trb_virt_to_dma(ring->enq_seg, ring->enqueue); seq_printf(s, "%pad\n", &dma); @@ -173,7 +173,7 @@ static int xhci_ring_enqueue_show(struct seq_file *s, void *unused) static int xhci_ring_dequeue_show(struct seq_file *s, void *unused) { dma_addr_t dma; - struct xhci_ring *ring = s->private; + struct xhci_ring *ring = *(struct xhci_ring **)s->private; dma = xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue); seq_printf(s, "%pad\n", &dma); @@ -183,7 +183,7 @@ static int xhci_ring_dequeue_show(struct seq_file *s, void *unused) static int xhci_ring_cycle_show(struct seq_file *s, void *unused) { - struct xhci_ring *ring = s->private; + struct xhci_ring *ring = *(struct xhci_ring **)s->private; seq_printf(s, "%d\n", ring->cycle_state); @@ -346,7 +346,7 @@ static void xhci_debugfs_create_files(struct xhci_hcd *xhci, } static struct dentry *xhci_debugfs_create_ring_dir(struct xhci_hcd *xhci, - struct xhci_ring *ring, + struct xhci_ring **ring, const char *name, struct dentry *parent) { @@ -387,7 +387,7 @@ void xhci_debugfs_create_endpoint(struct xhci_hcd *xhci, snprintf(epriv->name, sizeof(epriv->name), "ep%02d", ep_index); epriv->root = xhci_debugfs_create_ring_dir(xhci, - dev->eps[ep_index].new_ring, + &dev->eps[ep_index].new_ring, epriv->name, spriv->root); spriv->eps[ep_index] = epriv; @@ -423,7 +423,7 @@ void xhci_debugfs_create_slot(struct xhci_hcd *xhci, int slot_id) priv->dev = dev; dev->debugfs_private = priv; - xhci_debugfs_create_ring_dir(xhci, dev->eps[0].ring, + xhci_debugfs_create_ring_dir(xhci, &dev->eps[0].ring, "ep00", priv->root); xhci_debugfs_create_context_files(xhci, priv->root, slot_id); @@ -488,11 +488,11 @@ void xhci_debugfs_init(struct xhci_hcd *xhci) ARRAY_SIZE(xhci_extcap_dbc), "reg-ext-dbc"); - xhci_debugfs_create_ring_dir(xhci, xhci->cmd_ring, + xhci_debugfs_create_ring_dir(xhci, &xhci->cmd_ring, "command-ring", xhci->debugfs_root); - xhci_debugfs_create_ring_dir(xhci, xhci->event_ring, + xhci_debugfs_create_ring_dir(xhci, &xhci->event_ring, "event-ring", xhci->debugfs_root); diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 7ef1274ef7f7..1aad89b8aba0 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -178,6 +178,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_BROKEN_STREAMS; } if (pdev->vendor == PCI_VENDOR_ID_RENESAS && + pdev->device == 0x0014) + xhci->quirks |= XHCI_TRUST_TX_LENGTH; + if (pdev->vendor == PCI_VENDOR_ID_RENESAS && pdev->device == 0x0015) xhci->quirks |= XHCI_RESET_ON_RESUME; if (pdev->vendor == PCI_VENDOR_ID_VIA) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 2424d3020ca3..da6dbe3ebd8b 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3525,8 +3525,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) struct xhci_slot_ctx *slot_ctx; int i, ret; - xhci_debugfs_remove_slot(xhci, udev->slot_id); - #ifndef CONFIG_USB_DEFAULT_PERSIST /* * We called pm_runtime_get_noresume when the device was attached. @@ -3555,8 +3553,10 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) } ret = xhci_disable_slot(xhci, udev->slot_id); - if (ret) + if (ret) { + xhci_debugfs_remove_slot(xhci, udev->slot_id); xhci_free_virt_device(xhci, udev->slot_id); + } } int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 1aba9105b369..fc68952c994a 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1013,6 +1013,7 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) }, + { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 4faa09fe308c..8b4ecd2bd297 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -915,6 +915,12 @@ #define ICPDAS_I7563U_PID 0x0105 /* + * Airbus Defence and Space + */ +#define AIRBUS_DS_VID 0x1e8e /* Vendor ID */ +#define AIRBUS_DS_P8GR 0x6001 /* Tetra P8GR */ + +/* * RT Systems programming cables for various ham radios */ #define RTSYSTEMS_VID 0x2100 /* Vendor ID */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 3b3513874cfd..b6320e3be429 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -233,6 +233,8 @@ static void option_instat_callback(struct urb *urb); /* These Quectel products use Qualcomm's vendor ID */ #define QUECTEL_PRODUCT_UC20 0x9003 #define QUECTEL_PRODUCT_UC15 0x9090 +/* These Yuga products use Qualcomm's vendor ID */ +#define YUGA_PRODUCT_CLM920_NC5 0x9625 #define QUECTEL_VENDOR_ID 0x2c7c /* These Quectel products use Quectel's vendor ID */ @@ -280,6 +282,7 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_LE922_USBCFG3 0x1043 #define TELIT_PRODUCT_LE922_USBCFG5 0x1045 #define TELIT_PRODUCT_ME910 0x1100 +#define TELIT_PRODUCT_ME910_DUAL_MODEM 0x1101 #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 #define TELIT_PRODUCT_LE910_USBCFG4 0x1206 @@ -645,6 +648,11 @@ static const struct option_blacklist_info telit_me910_blacklist = { .reserved = BIT(1) | BIT(3), }; +static const struct option_blacklist_info telit_me910_dual_modem_blacklist = { + .sendsetup = BIT(0), + .reserved = BIT(3), +}; + static const struct option_blacklist_info telit_le910_blacklist = { .sendsetup = BIT(0), .reserved = BIT(1) | BIT(2), @@ -674,6 +682,10 @@ static const struct option_blacklist_info cinterion_rmnet2_blacklist = { .reserved = BIT(4) | BIT(5), }; +static const struct option_blacklist_info yuga_clm920_nc5_blacklist = { + .reserved = BIT(1) | BIT(4), +}; + static const struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) }, @@ -1178,6 +1190,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)}, { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + /* Yuga products use Qualcomm vendor ID */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5), + .driver_info = (kernel_ulong_t)&yuga_clm920_nc5_blacklist }, /* Quectel products using Quectel vendor ID */ { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, @@ -1244,6 +1259,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = (kernel_ulong_t)&telit_me910_blacklist }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), + .driver_info = (kernel_ulong_t)&telit_me910_dual_modem_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index e3892541a489..613f91add03d 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -162,6 +162,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x1199, 0x907a)}, /* Sierra Wireless EM74xx QDL */ {DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */ + {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */ + {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ @@ -342,6 +344,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) break; case 2: dev_dbg(dev, "NMEA GPS interface found\n"); + sendsetup = true; break; case 3: dev_dbg(dev, "Modem port found\n"); diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index a3df8ee82faf..e31a6f204397 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -149,8 +149,7 @@ static void stub_shutdown_connection(struct usbip_device *ud) * step 1? */ if (ud->tcp_socket) { - dev_dbg(&sdev->udev->dev, "shutdown tcp_socket %p\n", - ud->tcp_socket); + dev_dbg(&sdev->udev->dev, "shutdown sockfd %d\n", ud->sockfd); kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR); } diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index 4f48b306713f..c31c8402a0c5 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -237,11 +237,12 @@ void stub_device_cleanup_urbs(struct stub_device *sdev) struct stub_priv *priv; struct urb *urb; - dev_dbg(&sdev->udev->dev, "free sdev %p\n", sdev); + dev_dbg(&sdev->udev->dev, "Stub device cleaning up urbs\n"); while ((priv = stub_priv_pop(sdev))) { urb = priv->urb; - dev_dbg(&sdev->udev->dev, "free urb %p\n", urb); + dev_dbg(&sdev->udev->dev, "free urb seqnum %lu\n", + priv->seqnum); usb_kill_urb(urb); kmem_cache_free(stub_priv_cache, priv); diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 493ac2928391..6c5a59313999 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -211,9 +211,6 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev, if (priv->seqnum != pdu->u.cmd_unlink.seqnum) continue; - dev_info(&priv->urb->dev->dev, "unlink urb %p\n", - priv->urb); - /* * This matched urb is not completed yet (i.e., be in * flight in usb hcd hardware/driver). Now we are @@ -252,8 +249,8 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev, ret = usb_unlink_urb(priv->urb); if (ret != -EINPROGRESS) dev_err(&priv->urb->dev->dev, - "failed to unlink a urb %p, ret %d\n", - priv->urb, ret); + "failed to unlink a urb # %lu, ret %d\n", + priv->seqnum, ret); return 0; } @@ -342,14 +339,6 @@ static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu) epd = &ep->desc; - /* validate transfer_buffer_length */ - if (pdu->u.cmd_submit.transfer_buffer_length > INT_MAX) { - dev_err(&sdev->udev->dev, - "CMD_SUBMIT: -EMSGSIZE transfer_buffer_length %d\n", - pdu->u.cmd_submit.transfer_buffer_length); - return -1; - } - if (usb_endpoint_xfer_control(epd)) { if (dir == USBIP_DIR_OUT) return usb_sndctrlpipe(udev, epnum); @@ -482,8 +471,7 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, } /* allocate urb transfer buffer, if needed */ - if (pdu->u.cmd_submit.transfer_buffer_length > 0 && - pdu->u.cmd_submit.transfer_buffer_length <= INT_MAX) { + if (pdu->u.cmd_submit.transfer_buffer_length > 0) { priv->urb->transfer_buffer = kzalloc(pdu->u.cmd_submit.transfer_buffer_length, GFP_KERNEL); diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c index 53172b1f6257..f0ec41a50cbc 100644 --- a/drivers/usb/usbip/stub_tx.c +++ b/drivers/usb/usbip/stub_tx.c @@ -88,7 +88,7 @@ void stub_complete(struct urb *urb) /* link a urb to the queue of tx. */ spin_lock_irqsave(&sdev->priv_lock, flags); if (sdev->ud.tcp_socket == NULL) { - usbip_dbg_stub_tx("ignore urb for closed connection %p", urb); + usbip_dbg_stub_tx("ignore urb for closed connection\n"); /* It will be freed in stub_device_cleanup_urbs(). */ } else if (priv->unlinking) { stub_enqueue_ret_unlink(sdev, priv->seqnum, urb->status); @@ -190,8 +190,8 @@ static int stub_send_ret_submit(struct stub_device *sdev) /* 1. setup usbip_header */ setup_ret_submit_pdu(&pdu_header, urb); - usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n", - pdu_header.base.seqnum, urb); + usbip_dbg_stub_tx("setup txdata seqnum: %d\n", + pdu_header.base.seqnum); usbip_header_correct_endian(&pdu_header, 1); iov[iovnum].iov_base = &pdu_header; diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index f7978933b402..7b219d9109b4 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -317,26 +317,20 @@ int usbip_recv(struct socket *sock, void *buf, int size) struct msghdr msg = {.msg_flags = MSG_NOSIGNAL}; int total = 0; + if (!sock || !buf || !size) + return -EINVAL; + iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size); usbip_dbg_xmit("enter\n"); - if (!sock || !buf || !size) { - pr_err("invalid arg, sock %p buff %p size %d\n", sock, buf, - size); - return -EINVAL; - } - do { - int sz = msg_data_left(&msg); + msg_data_left(&msg); sock->sk->sk_allocation = GFP_NOIO; result = sock_recvmsg(sock, &msg, MSG_WAITALL); - if (result <= 0) { - pr_debug("receive sock %p buf %p size %u ret %d total %d\n", - sock, buf + total, sz, result, total); + if (result <= 0) goto err; - } total += result; } while (msg_data_left(&msg)); diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 6b3278c4b72a..c3e1008aa491 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -656,9 +656,6 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag struct vhci_device *vdev; unsigned long flags; - usbip_dbg_vhci_hc("enter, usb_hcd %p urb %p mem_flags %d\n", - hcd, urb, mem_flags); - if (portnum > VHCI_HC_PORTS) { pr_err("invalid port number %d\n", portnum); return -ENODEV; @@ -822,8 +819,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) struct vhci_device *vdev; unsigned long flags; - pr_info("dequeue a urb %p\n", urb); - spin_lock_irqsave(&vhci->lock, flags); priv = urb->hcpriv; @@ -851,7 +846,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) /* tcp connection is closed */ spin_lock(&vdev->priv_lock); - pr_info("device %p seems to be disconnected\n", vdev); list_del(&priv->list); kfree(priv); urb->hcpriv = NULL; @@ -863,8 +857,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) * vhci_rx will receive RET_UNLINK and give back the URB. * Otherwise, we give back it here. */ - pr_info("gives back urb %p\n", urb); - usb_hcd_unlink_urb_from_ep(hcd, urb); spin_unlock_irqrestore(&vhci->lock, flags); @@ -892,8 +884,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) unlink->unlink_seqnum = priv->seqnum; - pr_info("device %p seems to be still connected\n", vdev); - /* send cmd_unlink and try to cancel the pending URB in the * peer */ list_add_tail(&unlink->list, &vdev->unlink_tx); @@ -975,7 +965,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud) /* need this? see stub_dev.c */ if (ud->tcp_socket) { - pr_debug("shutdown tcp_socket %p\n", ud->tcp_socket); + pr_debug("shutdown tcp_socket %d\n", ud->sockfd); kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR); } diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c index 90577e8b2282..112ebb90d8c9 100644 --- a/drivers/usb/usbip/vhci_rx.c +++ b/drivers/usb/usbip/vhci_rx.c @@ -23,24 +23,23 @@ struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum) urb = priv->urb; status = urb->status; - usbip_dbg_vhci_rx("find urb %p vurb %p seqnum %u\n", - urb, priv, seqnum); + usbip_dbg_vhci_rx("find urb seqnum %u\n", seqnum); switch (status) { case -ENOENT: /* fall through */ case -ECONNRESET: - dev_info(&urb->dev->dev, - "urb %p was unlinked %ssynchronuously.\n", urb, - status == -ENOENT ? "" : "a"); + dev_dbg(&urb->dev->dev, + "urb seq# %u was unlinked %ssynchronuously\n", + seqnum, status == -ENOENT ? "" : "a"); break; case -EINPROGRESS: /* no info output */ break; default: - dev_info(&urb->dev->dev, - "urb %p may be in a error, status %d\n", urb, - status); + dev_dbg(&urb->dev->dev, + "urb seq# %u may be in a error, status %d\n", + seqnum, status); } list_del(&priv->list); @@ -67,8 +66,8 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev, spin_unlock_irqrestore(&vdev->priv_lock, flags); if (!urb) { - pr_err("cannot find a urb of seqnum %u\n", pdu->base.seqnum); - pr_info("max seqnum %d\n", + pr_err("cannot find a urb of seqnum %u max seqnum %d\n", + pdu->base.seqnum, atomic_read(&vhci_hcd->seqnum)); usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); return; @@ -91,7 +90,7 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev, if (usbip_dbg_flag_vhci_rx) usbip_dump_urb(urb); - usbip_dbg_vhci_rx("now giveback urb %p\n", urb); + usbip_dbg_vhci_rx("now giveback urb %u\n", pdu->base.seqnum); spin_lock_irqsave(&vhci->lock, flags); usb_hcd_unlink_urb_from_ep(vhci_hcd_to_hcd(vhci_hcd), urb); @@ -158,7 +157,7 @@ static void vhci_recv_ret_unlink(struct vhci_device *vdev, pr_info("the urb (seqnum %d) was already given back\n", pdu->base.seqnum); } else { - usbip_dbg_vhci_rx("now giveback urb %p\n", urb); + usbip_dbg_vhci_rx("now giveback urb %d\n", pdu->base.seqnum); /* If unlink is successful, status is -ECONNRESET */ urb->status = pdu->u.ret_unlink.status; diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c index d625a2ff4b71..9aed15a358b7 100644 --- a/drivers/usb/usbip/vhci_tx.c +++ b/drivers/usb/usbip/vhci_tx.c @@ -69,7 +69,8 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev) memset(&msg, 0, sizeof(msg)); memset(&iov, 0, sizeof(iov)); - usbip_dbg_vhci_tx("setup txdata urb %p\n", urb); + usbip_dbg_vhci_tx("setup txdata urb seqnum %lu\n", + priv->seqnum); /* 1. setup usbip_header */ setup_cmd_submit_pdu(&pdu_header, urb); diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index d1e1d8d2b9d5..4c789e61554b 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -805,7 +805,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) pvcalls_exit(); return ret; } - map2 = kzalloc(sizeof(*map2), GFP_KERNEL); + map2 = kzalloc(sizeof(*map2), GFP_ATOMIC); if (map2 == NULL) { clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index ff8d5bf4354f..23c7f395d718 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -895,20 +895,38 @@ error: * However, if we didn't have a callback promise outstanding, or it was * outstanding on a different server, then it won't break it either... */ -static int afs_dir_remove_link(struct dentry *dentry, struct key *key) +static int afs_dir_remove_link(struct dentry *dentry, struct key *key, + unsigned long d_version_before, + unsigned long d_version_after) { + bool dir_valid; int ret = 0; + /* There were no intervening changes on the server if the version + * number we got back was incremented by exactly 1. + */ + dir_valid = (d_version_after == d_version_before + 1); + if (d_really_is_positive(dentry)) { struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - kdebug("AFS_VNODE_DELETED"); - clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); - - ret = afs_validate(vnode, key); - if (ret == -ESTALE) + if (dir_valid) { + drop_nlink(&vnode->vfs_inode); + if (vnode->vfs_inode.i_nlink == 0) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } ret = 0; + } else { + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + kdebug("AFS_VNODE_DELETED"); + + ret = afs_validate(vnode, key); + if (ret == -ESTALE) + ret = 0; + } _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret); } @@ -923,6 +941,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) struct afs_fs_cursor fc; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; struct key *key; + unsigned long d_version = (unsigned long)dentry->d_fsdata; int ret; _enter("{%x:%u},{%pd}", @@ -955,7 +974,9 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) afs_vnode_commit_status(&fc, dvnode, fc.cb_break); ret = afs_end_vnode_operation(&fc); if (ret == 0) - ret = afs_dir_remove_link(dentry, key); + ret = afs_dir_remove_link( + dentry, key, d_version, + (unsigned long)dvnode->status.data_version); } error_key: diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 3415eb7484f6..1e81864ef0b2 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -377,6 +377,10 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) } read_sequnlock_excl(&vnode->cb_lock); + + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + clear_nlink(&vnode->vfs_inode); + if (valid) goto valid; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index ea1460b9b71a..e1126659f043 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -885,7 +885,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, { struct afs_net *net = call->net; enum afs_call_state state; - u32 remote_abort; + u32 remote_abort = 0; int ret; _enter("{%s,%zu},,%zu,%d", diff --git a/fs/afs/write.c b/fs/afs/write.c index cb5f8a3df577..9370e2feb999 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -198,7 +198,7 @@ int afs_write_end(struct file *file, struct address_space *mapping, ret = afs_fill_page(vnode, key, pos + copied, len - copied, page); if (ret < 0) - return ret; + goto out; } SetPageUptodate(page); } @@ -206,10 +206,12 @@ int afs_write_end(struct file *file, struct address_space *mapping, set_page_dirty(page); if (PageDirty(page)) _debug("dirtied"); + ret = copied; + +out: unlock_page(page); put_page(page); - - return copied; + return ret; } /* diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 5d73f79ded8b..056276101c63 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -87,6 +87,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( spin_lock(&root->inode_lock); node = radix_tree_lookup(&root->delayed_nodes_tree, ino); + if (node) { if (btrfs_inode->delayed_node) { refcount_inc(&node->refs); /* can be accessed */ @@ -94,9 +95,30 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( spin_unlock(&root->inode_lock); return node; } - btrfs_inode->delayed_node = node; - /* can be accessed and cached in the inode */ - refcount_add(2, &node->refs); + + /* + * It's possible that we're racing into the middle of removing + * this node from the radix tree. In this case, the refcount + * was zero and it should never go back to one. Just return + * NULL like it was never in the radix at all; our release + * function is in the process of removing it. + * + * Some implementations of refcount_inc refuse to bump the + * refcount once it has hit zero. If we don't do this dance + * here, refcount_inc() may decide to just WARN_ONCE() instead + * of actually bumping the refcount. + * + * If this node is properly in the radix, we want to bump the + * refcount twice, once for the inode and once for this get + * operation. + */ + if (refcount_inc_not_zero(&node->refs)) { + refcount_inc(&node->refs); + btrfs_inode->delayed_node = node; + } else { + node = NULL; + } + spin_unlock(&root->inode_lock); return node; } @@ -254,17 +276,18 @@ static void __btrfs_release_delayed_node( mutex_unlock(&delayed_node->mutex); if (refcount_dec_and_test(&delayed_node->refs)) { - bool free = false; struct btrfs_root *root = delayed_node->root; + spin_lock(&root->inode_lock); - if (refcount_read(&delayed_node->refs) == 0) { - radix_tree_delete(&root->delayed_nodes_tree, - delayed_node->inode_id); - free = true; - } + /* + * Once our refcount goes to zero, nobody is allowed to bump it + * back up. We can delete it now. + */ + ASSERT(refcount_read(&delayed_node->refs) == 0); + radix_tree_delete(&root->delayed_nodes_tree, + delayed_node->inode_id); spin_unlock(&root->inode_lock); - if (free) - kmem_cache_free(delayed_node_cache, delayed_node); + kmem_cache_free(delayed_node_cache, delayed_node); } } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 49810b70afd3..a25684287501 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -237,7 +237,6 @@ static struct btrfs_device *__alloc_device(void) kfree(dev); return ERR_PTR(-ENOMEM); } - bio_get(dev->flush_bio); INIT_LIST_HEAD(&dev->dev_list); INIT_LIST_HEAD(&dev->dev_alloc_list); diff --git a/fs/exec.c b/fs/exec.c index 5688b5e1b937..7eb8d21bcab9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1349,9 +1349,14 @@ void setup_new_exec(struct linux_binprm * bprm) current->sas_ss_sp = current->sas_ss_size = 0; - /* Figure out dumpability. */ + /* + * Figure out dumpability. Note that this checking only of current + * is wrong, but userspace depends on it. This should be testing + * bprm->secureexec instead. + */ if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP || - bprm->secureexec) + !(uid_eq(current_euid(), current_uid()) && + gid_eq(current_egid(), current_gid()))) set_dumpable(current->mm, suid_dumpable); else set_dumpable(current->mm, SUID_DUMP_USER); diff --git a/fs/super.c b/fs/super.c index 7ff1349609e4..06bd25d90ba5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -517,7 +517,11 @@ retry: hlist_add_head(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); get_filesystem(type); - register_shrinker(&s->s_shrink); + err = register_shrinker(&s->s_shrink); + if (err) { + deactivate_locked_super(s); + s = ERR_PTR(err); + } return s; } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index ac9a4e65ca49..41a75f9f23fd 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -570,11 +570,14 @@ out: static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, struct userfaultfd_wait_queue *ewq) { + struct userfaultfd_ctx *release_new_ctx; + if (WARN_ON_ONCE(current->flags & PF_EXITING)) goto out; ewq->ctx = ctx; init_waitqueue_entry(&ewq->wq, current); + release_new_ctx = NULL; spin_lock(&ctx->event_wqh.lock); /* @@ -601,8 +604,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, new = (struct userfaultfd_ctx *) (unsigned long) ewq->msg.arg.reserved.reserved1; - - userfaultfd_ctx_put(new); + release_new_ctx = new; } break; } @@ -617,6 +619,20 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, __set_current_state(TASK_RUNNING); spin_unlock(&ctx->event_wqh.lock); + if (release_new_ctx) { + struct vm_area_struct *vma; + struct mm_struct *mm = release_new_ctx->mm; + + /* the various vma->vm_userfaultfd_ctx still points to it */ + down_write(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) + if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) + vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + up_write(&mm->mmap_sem); + + userfaultfd_ctx_put(release_new_ctx); + } + /* * ctx may go away after this if the userfault pseudo fd is * already released. diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 21e2d70884e1..4fc526a27a94 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -399,7 +399,7 @@ xfs_map_blocks( (ip->i_df.if_flags & XFS_IFEXTENTS)); ASSERT(offset <= mp->m_super->s_maxbytes); - if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes) + if (offset > mp->m_super->s_maxbytes - count) count = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -1312,7 +1312,7 @@ xfs_get_blocks( lockmode = xfs_ilock_data_map_shared(ip); ASSERT(offset <= mp->m_super->s_maxbytes); - if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes) + if (offset > mp->m_super->s_maxbytes - size) size = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 7ab52a8bc0a9..66e1edbfb2b2 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1006,7 +1006,7 @@ xfs_file_iomap_begin( } ASSERT(offset <= mp->m_super->s_maxbytes); - if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) + if (offset > mp->m_super->s_maxbytes - length) length = mp->m_super->s_maxbytes - offset; offset_fsb = XFS_B_TO_FSBT(mp, offset); end_fsb = XFS_B_TO_FSB(mp, offset + length); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index ec952dfad359..b897b11afb2c 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -48,7 +48,7 @@ STATIC int xfs_qm_init_quotainos(xfs_mount_t *); STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); - +STATIC void xfs_qm_destroy_quotainos(xfs_quotainfo_t *qi); STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp); /* * We use the batch lookup interface to iterate over the dquots as it @@ -695,9 +695,17 @@ xfs_qm_init_quotainfo( qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; qinf->qi_shrinker.seeks = DEFAULT_SEEKS; qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; - register_shrinker(&qinf->qi_shrinker); + + error = register_shrinker(&qinf->qi_shrinker); + if (error) + goto out_free_inos; + return 0; +out_free_inos: + mutex_destroy(&qinf->qi_quotaofflock); + mutex_destroy(&qinf->qi_tree_lock); + xfs_qm_destroy_quotainos(qinf); out_free_lru: list_lru_destroy(&qinf->qi_lru); out_free_qinf: @@ -706,7 +714,6 @@ out_free_qinf: return error; } - /* * Gets called when unmounting a filesystem or when all quotas get * turned off. @@ -723,19 +730,8 @@ xfs_qm_destroy_quotainfo( unregister_shrinker(&qi->qi_shrinker); list_lru_destroy(&qi->qi_lru); - - if (qi->qi_uquotaip) { - IRELE(qi->qi_uquotaip); - qi->qi_uquotaip = NULL; /* paranoia */ - } - if (qi->qi_gquotaip) { - IRELE(qi->qi_gquotaip); - qi->qi_gquotaip = NULL; - } - if (qi->qi_pquotaip) { - IRELE(qi->qi_pquotaip); - qi->qi_pquotaip = NULL; - } + xfs_qm_destroy_quotainos(qi); + mutex_destroy(&qi->qi_tree_lock); mutex_destroy(&qi->qi_quotaofflock); kmem_free(qi); mp->m_quotainfo = NULL; @@ -1600,6 +1596,24 @@ error_rele: } STATIC void +xfs_qm_destroy_quotainos( + xfs_quotainfo_t *qi) +{ + if (qi->qi_uquotaip) { + IRELE(qi->qi_uquotaip); + qi->qi_uquotaip = NULL; /* paranoia */ + } + if (qi->qi_gquotaip) { + IRELE(qi->qi_gquotaip); + qi->qi_gquotaip = NULL; + } + if (qi->qi_pquotaip) { + IRELE(qi->qi_pquotaip); + qi->qi_pquotaip = NULL; + } +} + +STATIC void xfs_qm_dqfree_one( struct xfs_dquot *dqp) { diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 38d9c5861ed8..f38227a78eae 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -18,6 +18,7 @@ #include <linux/if_alg.h> #include <linux/scatterlist.h> #include <linux/types.h> +#include <linux/atomic.h> #include <net/sock.h> #include <crypto/aead.h> @@ -150,7 +151,7 @@ struct af_alg_ctx { struct crypto_wait wait; size_t used; - size_t rcvused; + atomic_t rcvused; bool more; bool merge; @@ -215,7 +216,7 @@ static inline int af_alg_rcvbuf(struct sock *sk) struct af_alg_ctx *ctx = ask->private; return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) - - ctx->rcvused, 0); + atomic_read(&ctx->rcvused), 0); } /** diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9e03046d1df2..6be837c063c3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -434,6 +434,8 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) attr->numa_node : NUMA_NO_NODE; } +struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -521,6 +523,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, { return 0; } + +static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, + enum bpf_prog_type type) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, @@ -529,6 +537,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, return bpf_prog_get_type_dev(ufd, type, false); } +bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); + int bpf_prog_offload_compile(struct bpf_prog *prog); void bpf_prog_offload_destroy(struct bpf_prog *prog); int bpf_prog_offload_info_fill(struct bpf_prog_info *info, diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 201ab7267986..1a32e558eb11 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -86,7 +86,7 @@ enum cpuhp_state { CPUHP_MM_ZSWP_POOL_PREPARE, CPUHP_KVM_PPC_BOOK3S_PREPARE, CPUHP_ZCOMP_PREPARE, - CPUHP_TIMERS_DEAD, + CPUHP_TIMERS_PREPARE, CPUHP_MIPS_SOC_PREPARE, CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, diff --git a/include/linux/efi.h b/include/linux/efi.h index d813f7b04da7..29fdf8029cf6 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -140,11 +140,13 @@ struct efi_boot_memmap { struct capsule_info { efi_capsule_header_t header; + efi_capsule_header_t *capsule; int reset_type; long index; size_t count; size_t total_size; - phys_addr_t *pages; + struct page **pages; + phys_addr_t *phys; size_t page_bytes_remain; }; diff --git a/include/linux/fscache.h b/include/linux/fscache.h index f4ff47d4a893..fe0c349684fa 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -755,7 +755,7 @@ bool fscache_maybe_release_page(struct fscache_cookie *cookie, { if (fscache_cookie_valid(cookie) && PageFsCache(page)) return __fscache_maybe_release_page(cookie, page, gfp); - return false; + return true; } /** diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 55e672592fa9..7258cd676df4 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -66,9 +66,10 @@ struct gpio_irq_chip { /** * @lock_key: * - * Per GPIO IRQ chip lockdep class. + * Per GPIO IRQ chip lockdep classes. */ struct lock_class_key *lock_key; + struct lock_class_key *request_key; /** * @parent_handler: @@ -323,7 +324,8 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip, /* add/remove chips */ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, - struct lock_class_key *lock_key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); /** * gpiochip_add_data() - register a gpio_chip @@ -350,11 +352,13 @@ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, */ #ifdef CONFIG_LOCKDEP #define gpiochip_add_data(chip, data) ({ \ - static struct lock_class_key key; \ - gpiochip_add_data_with_key(chip, data, &key); \ + static struct lock_class_key lock_key; \ + static struct lock_class_key request_key; \ + gpiochip_add_data_with_key(chip, data, &lock_key, \ + &request_key); \ }) #else -#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL) +#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL, NULL) #endif static inline int gpiochip_add(struct gpio_chip *chip) @@ -429,7 +433,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type, bool threaded, - struct lock_class_key *lock_key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); #ifdef CONFIG_LOCKDEP @@ -445,10 +450,12 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type) { - static struct lock_class_key key; + static struct lock_class_key lock_key; + static struct lock_class_key request_key; return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, false, &key); + handler, type, false, + &lock_key, &request_key); } static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, @@ -458,10 +465,12 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, unsigned int type) { - static struct lock_class_key key; + static struct lock_class_key lock_key; + static struct lock_class_key request_key; return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, true, &key); + handler, type, true, + &lock_key, &request_key); } #else static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, @@ -471,7 +480,7 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, unsigned int type) { return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, false, NULL); + handler, type, false, NULL, NULL); } static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, @@ -481,7 +490,7 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, unsigned int type) { return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, true, NULL); + handler, type, true, NULL, NULL); } #endif /* CONFIG_LOCKDEP */ diff --git a/include/linux/irq.h b/include/linux/irq.h index e140f69163b6..a0231e96a578 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -212,6 +212,7 @@ struct irq_data { * mask. Applies only to affinity managed irqs. * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set + * IRQD_CAN_RESERVE - Can use reservation mode */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -233,6 +234,7 @@ enum { IRQD_MANAGED_SHUTDOWN = (1 << 23), IRQD_SINGLE_TARGET = (1 << 24), IRQD_DEFAULT_TRIGGER_SET = (1 << 25), + IRQD_CAN_RESERVE = (1 << 26), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -377,6 +379,21 @@ static inline bool irqd_is_managed_and_shutdown(struct irq_data *d) return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN; } +static inline void irqd_set_can_reserve(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_CAN_RESERVE; +} + +static inline void irqd_clr_can_reserve(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_CAN_RESERVE; +} + +static inline bool irqd_can_reserve(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_CAN_RESERVE; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 39fb3700f7a9..25b33b664537 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -255,12 +255,15 @@ static inline bool irq_is_percpu_devid(unsigned int irq) } static inline void -irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class) +irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, + struct lock_class_key *request_class) { struct irq_desc *desc = irq_to_desc(irq); - if (desc) - lockdep_set_class(&desc->lock, class); + if (desc) { + lockdep_set_class(&desc->lock, lock_class); + lockdep_set_class(&desc->request_mutex, request_class); + } } #ifdef CONFIG_IRQ_PREFLOW_FASTEOI diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index a34355d19546..48c7e86bb556 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -113,7 +113,7 @@ struct irq_domain_ops { unsigned int nr_irqs, void *arg); void (*free)(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs); - int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool early); + int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve); void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *out_hwirq, unsigned int *out_type); diff --git a/include/linux/pti.h b/include/linux/pti.h new file mode 100644 index 000000000000..0174883a935a --- /dev/null +++ b/include/linux/pti.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _INCLUDE_PTI_H +#define _INCLUDE_PTI_H + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +#include <asm/pti.h> +#else +static inline void pti_init(void) { } +#endif + +#endif diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h index dd5d69323701..6dfda97a6c1a 100644 --- a/include/linux/sh_eth.h +++ b/include/linux/sh_eth.h @@ -14,7 +14,6 @@ struct sh_eth_plat_data { unsigned char mac_addr[ETH_ALEN]; unsigned no_ether_link:1; unsigned ether_link_active_low:1; - unsigned needs_init:1; }; #endif diff --git a/include/linux/tick.h b/include/linux/tick.h index f442d1a42025..7cc35921218e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -119,6 +119,7 @@ extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern unsigned long tick_nohz_get_idle_calls(void); +extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); #else /* !CONFIG_NO_HZ_COMMON */ diff --git a/include/linux/timer.h b/include/linux/timer.h index 04af640ea95b..2448f9cc48a3 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -207,9 +207,11 @@ unsigned long round_jiffies_up(unsigned long j); unsigned long round_jiffies_up_relative(unsigned long j); #ifdef CONFIG_HOTPLUG_CPU +int timers_prepare_cpu(unsigned int cpu); int timers_dead_cpu(unsigned int cpu); #else -#define timers_dead_cpu NULL +#define timers_prepare_cpu NULL +#define timers_dead_cpu NULL #endif #endif diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8ac4d5cdbfed..02369e379d35 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -993,7 +993,7 @@ void sctp_transport_burst_limited(struct sctp_transport *); void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); void sctp_transport_reset(struct sctp_transport *t); -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); +bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 13223396dc64..f96391e84a8a 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -146,7 +146,7 @@ struct vxlanhdr_gpe { np_applied:1, instance_applied:1, version:2, -reserved_flags2:2; + reserved_flags2:2; #elif defined(__BIG_ENDIAN_BITFIELD) u8 reserved_flags2:2, version:2, diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 87b7529fcdfe..f8cb5760ea4f 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -23,6 +23,7 @@ #define _UAPI_LINUX_IF_ETHER_H #include <linux/types.h> +#include <linux/libc-compat.h> /* * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble @@ -150,11 +151,13 @@ * This is an Ethernet frame header. */ +#if __UAPI_DEF_ETHHDR struct ethhdr { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ unsigned char h_source[ETH_ALEN]; /* source ether addr */ __be16 h_proto; /* packet type ID field */ } __attribute__((packed)); +#endif #endif /* _UAPI_LINUX_IF_ETHER_H */ diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index 282875cf8056..fc29efaa918c 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -168,47 +168,106 @@ /* If we did not see any headers from any supported C libraries, * or we are being included in the kernel, then define everything - * that we need. */ + * that we need. Check for previous __UAPI_* definitions to give + * unsupported C libraries a way to opt out of any kernel definition. */ #else /* !defined(__GLIBC__) */ /* Definitions for if.h */ +#ifndef __UAPI_DEF_IF_IFCONF #define __UAPI_DEF_IF_IFCONF 1 +#endif +#ifndef __UAPI_DEF_IF_IFMAP #define __UAPI_DEF_IF_IFMAP 1 +#endif +#ifndef __UAPI_DEF_IF_IFNAMSIZ #define __UAPI_DEF_IF_IFNAMSIZ 1 +#endif +#ifndef __UAPI_DEF_IF_IFREQ #define __UAPI_DEF_IF_IFREQ 1 +#endif /* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS #define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1 +#endif /* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO #define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 +#endif /* Definitions for in.h */ +#ifndef __UAPI_DEF_IN_ADDR #define __UAPI_DEF_IN_ADDR 1 +#endif +#ifndef __UAPI_DEF_IN_IPPROTO #define __UAPI_DEF_IN_IPPROTO 1 +#endif +#ifndef __UAPI_DEF_IN_PKTINFO #define __UAPI_DEF_IN_PKTINFO 1 +#endif +#ifndef __UAPI_DEF_IP_MREQ #define __UAPI_DEF_IP_MREQ 1 +#endif +#ifndef __UAPI_DEF_SOCKADDR_IN #define __UAPI_DEF_SOCKADDR_IN 1 +#endif +#ifndef __UAPI_DEF_IN_CLASS #define __UAPI_DEF_IN_CLASS 1 +#endif /* Definitions for in6.h */ +#ifndef __UAPI_DEF_IN6_ADDR #define __UAPI_DEF_IN6_ADDR 1 +#endif +#ifndef __UAPI_DEF_IN6_ADDR_ALT #define __UAPI_DEF_IN6_ADDR_ALT 1 +#endif +#ifndef __UAPI_DEF_SOCKADDR_IN6 #define __UAPI_DEF_SOCKADDR_IN6 1 +#endif +#ifndef __UAPI_DEF_IPV6_MREQ #define __UAPI_DEF_IPV6_MREQ 1 +#endif +#ifndef __UAPI_DEF_IPPROTO_V6 #define __UAPI_DEF_IPPROTO_V6 1 +#endif +#ifndef __UAPI_DEF_IPV6_OPTIONS #define __UAPI_DEF_IPV6_OPTIONS 1 +#endif +#ifndef __UAPI_DEF_IN6_PKTINFO #define __UAPI_DEF_IN6_PKTINFO 1 +#endif +#ifndef __UAPI_DEF_IP6_MTUINFO #define __UAPI_DEF_IP6_MTUINFO 1 +#endif /* Definitions for ipx.h */ +#ifndef __UAPI_DEF_SOCKADDR_IPX #define __UAPI_DEF_SOCKADDR_IPX 1 +#endif +#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION #define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 +#endif +#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION #define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 +#endif +#ifndef __UAPI_DEF_IPX_CONFIG_DATA #define __UAPI_DEF_IPX_CONFIG_DATA 1 +#endif +#ifndef __UAPI_DEF_IPX_ROUTE_DEF #define __UAPI_DEF_IPX_ROUTE_DEF 1 +#endif /* Definitions for xattr.h */ +#ifndef __UAPI_DEF_XATTR #define __UAPI_DEF_XATTR 1 +#endif #endif /* __GLIBC__ */ +/* Definitions for if_ether.h */ +/* allow libcs like musl to deactivate this, glibc does not implement this. */ +#ifndef __UAPI_DEF_ETHHDR +#define __UAPI_DEF_ETHHDR 1 +#endif + #endif /* _UAPI_LIBC_COMPAT_H */ diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index fc8c15a24a43..9574bd40870b 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -36,7 +36,7 @@ enum ip_conntrack_info { #define NF_CT_STATE_INVALID_BIT (1 << 0) #define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1)) -#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_UNTRACKED + 1)) +#define NF_CT_STATE_UNTRACKED_BIT (1 << 6) /* Bitset representing status of connection. */ enum ip_conntrack_status { diff --git a/init/Kconfig b/init/Kconfig index 2934249fba46..690a381adee0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -461,10 +461,14 @@ endmenu # "CPU/Task time and stats accounting" config CPU_ISOLATION bool "CPU isolation" + default y help Make sure that CPUs running critical tasks are not disturbed by any source of "noise" such as unbound workqueues, timers, kthreads... - Unbound jobs get offloaded to housekeeping CPUs. + Unbound jobs get offloaded to housekeeping CPUs. This is driven by + the "isolcpus=" boot parameter. + + Say Y if unsure. source "kernel/rcu/Kconfig" diff --git a/init/main.c b/init/main.c index 7b606fc48482..a8100b954839 100644 --- a/init/main.c +++ b/init/main.c @@ -75,6 +75,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <linux/ptrace.h> +#include <linux/pti.h> #include <linux/blkdev.h> #include <linux/elevator.h> #include <linux/sched_clock.h> @@ -506,6 +507,8 @@ static void __init mm_init(void) ioremap_huge_init(); /* Should be run before the first non-init thread is created */ init_espfix_bsp(); + /* Should be run after espfix64 is set up. */ + pti_init(); } asmlinkage __visible void __init start_kernel(void) diff --git a/kernel/acct.c b/kernel/acct.c index d15c0ee4d955..addf7732fb56 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -102,7 +102,7 @@ static int check_free_space(struct bsd_acct_struct *acct) { struct kstatfs sbuf; - if (time_is_before_jiffies(acct->needcheck)) + if (time_is_after_jiffies(acct->needcheck)) goto out; /* May block */ diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 01aaef1a77c5..5bb5e49ef4c3 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -368,7 +368,45 @@ out: putname(pname); return ret; } -EXPORT_SYMBOL_GPL(bpf_obj_get_user); + +static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) +{ + struct bpf_prog *prog; + int ret = inode_permission(inode, MAY_READ | MAY_WRITE); + if (ret) + return ERR_PTR(ret); + + if (inode->i_op == &bpf_map_iops) + return ERR_PTR(-EINVAL); + if (inode->i_op != &bpf_prog_iops) + return ERR_PTR(-EACCES); + + prog = inode->i_private; + + ret = security_bpf_prog(prog); + if (ret < 0) + return ERR_PTR(ret); + + if (!bpf_prog_get_ok(prog, &type, false)) + return ERR_PTR(-EINVAL); + + return bpf_prog_inc(prog); +} + +struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) +{ + struct bpf_prog *prog; + struct path path; + int ret = kern_path(name, LOOKUP_FOLLOW, &path); + if (ret) + return ERR_PTR(ret); + prog = __get_prog_inode(d_backing_inode(path.dentry), type); + if (!IS_ERR(prog)) + touch_atime(&path); + path_put(&path); + return prog; +} +EXPORT_SYMBOL(bpf_prog_get_type_path); static void bpf_evict_inode(struct inode *inode) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ebf0fb23e237..2bac0dc8baba 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1068,7 +1068,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) } EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); -static bool bpf_prog_get_ok(struct bpf_prog *prog, +bool bpf_prog_get_ok(struct bpf_prog *prog, enum bpf_prog_type *attach_type, bool attach_drv) { /* not an attachment, just a refcount inc, always allow */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 024085daab1a..a2c05d2476ac 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -123,7 +123,11 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) */ do { css_task_iter_start(&from->self, 0, &it); - task = css_task_iter_next(&it); + + do { + task = css_task_iter_next(&it); + } while (task && (task->flags & PF_EXITING)); + if (task) get_task_struct(task); css_task_iter_end(&it); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 0b1ffe147f24..2cf06c274e4c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1397,7 +1397,7 @@ static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name, cft->name); else - strncpy(buf, cft->name, CGROUP_FILE_NAME_MAX); + strlcpy(buf, cft->name, CGROUP_FILE_NAME_MAX); return buf; } @@ -1864,9 +1864,9 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) root->flags = opts->flags; if (opts->release_agent) - strcpy(root->release_agent_path, opts->release_agent); + strlcpy(root->release_agent_path, opts->release_agent, PATH_MAX); if (opts->name) - strcpy(root->name, opts->name); + strlcpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN); if (opts->cpuset_clone_children) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } @@ -4125,26 +4125,24 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) static void css_task_iter_advance(struct css_task_iter *it) { - struct list_head *l = it->task_pos; + struct list_head *next; lockdep_assert_held(&css_set_lock); - WARN_ON_ONCE(!l); - repeat: /* * Advance iterator to find next entry. cset->tasks is consumed * first and then ->mg_tasks. After ->mg_tasks, we move onto the * next cset. */ - l = l->next; + next = it->task_pos->next; - if (l == it->tasks_head) - l = it->mg_tasks_head->next; + if (next == it->tasks_head) + next = it->mg_tasks_head->next; - if (l == it->mg_tasks_head) + if (next == it->mg_tasks_head) css_task_iter_advance_css_set(it); else - it->task_pos = l; + it->task_pos = next; /* if PROCS, skip over tasks which aren't group leaders */ if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos && diff --git a/kernel/cpu.c b/kernel/cpu.c index 41376c3ac93b..53f7dc65f9a3 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -80,19 +80,19 @@ static struct lockdep_map cpuhp_state_down_map = STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map); -static void inline cpuhp_lock_acquire(bool bringup) +static inline void cpuhp_lock_acquire(bool bringup) { lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); } -static void inline cpuhp_lock_release(bool bringup) +static inline void cpuhp_lock_release(bool bringup) { lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); } #else -static void inline cpuhp_lock_acquire(bool bringup) { } -static void inline cpuhp_lock_release(bool bringup) { } +static inline void cpuhp_lock_acquire(bool bringup) { } +static inline void cpuhp_lock_release(bool bringup) { } #endif @@ -1277,9 +1277,9 @@ static struct cpuhp_step cpuhp_bp_states[] = { * before blk_mq_queue_reinit_notify() from notify_dead(), * otherwise a RCU stall occurs. */ - [CPUHP_TIMERS_DEAD] = { + [CPUHP_TIMERS_PREPARE] = { .name = "timers:dead", - .startup.single = NULL, + .startup.single = timers_prepare_cpu, .teardown.single = timers_dead_cpu, }, /* Kicks the plugged cpu into life */ diff --git a/kernel/exit.c b/kernel/exit.c index df0c91d5606c..995453d9fb55 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1763,3 +1763,4 @@ __weak void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } +EXPORT_SYMBOL(abort); diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h index 17f05ef8f575..e4d3819a91cc 100644 --- a/kernel/irq/debug.h +++ b/kernel/irq/debug.h @@ -12,6 +12,11 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) { + static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5); + + if (!__ratelimit(&ratelimit)) + return; + printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n", irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled); printk("->handle_irq(): %p, ", desc->handle_irq); diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 7f608ac39653..acfaaef8672a 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -113,6 +113,7 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING), BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), + BIT_MASK_DESCR(IRQD_CAN_RESERVE), BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index c26c5bb6b491..508c03dfef25 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -364,10 +364,11 @@ irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq) EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip); /* - * Separate lockdep class for interrupt chip which can nest irq_desc - * lock. + * Separate lockdep classes for interrupt chip which can nest irq_desc + * lock and request mutex. */ static struct lock_class_key irq_nested_lock_class; +static struct lock_class_key irq_nested_request_class; /* * irq_map_generic_chip - Map a generic chip for an irq domain @@ -409,7 +410,8 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, set_bit(idx, &gc->installed); if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK) - irq_set_lockdep_class(virq, &irq_nested_lock_class); + irq_set_lockdep_class(virq, &irq_nested_lock_class, + &irq_nested_request_class); if (chip->irq_calc_mask) chip->irq_calc_mask(data); @@ -479,7 +481,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, continue; if (flags & IRQ_GC_INIT_NESTED_LOCK) - irq_set_lockdep_class(i, &irq_nested_lock_class); + irq_set_lockdep_class(i, &irq_nested_lock_class, + &irq_nested_request_class); if (!(flags & IRQ_GC_NO_MASK)) { struct irq_data *d = irq_get_irq_data(i); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 07d08ca701ec..ab19371eab9b 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -440,7 +440,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) #endif /* !CONFIG_GENERIC_PENDING_IRQ */ #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) -static inline int irq_domain_activate_irq(struct irq_data *data, bool early) +static inline int irq_domain_activate_irq(struct irq_data *data, bool reserve) { irqd_set_activated(data); return 0; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 4f4f60015e8a..62068ad46930 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1693,7 +1693,7 @@ static void __irq_domain_deactivate_irq(struct irq_data *irq_data) } } -static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) +static int __irq_domain_activate_irq(struct irq_data *irqd, bool reserve) { int ret = 0; @@ -1702,9 +1702,9 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) if (irqd->parent_data) ret = __irq_domain_activate_irq(irqd->parent_data, - early); + reserve); if (!ret && domain->ops->activate) { - ret = domain->ops->activate(domain, irqd, early); + ret = domain->ops->activate(domain, irqd, reserve); /* Rollback in case of error */ if (ret && irqd->parent_data) __irq_domain_deactivate_irq(irqd->parent_data); @@ -1716,17 +1716,18 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) /** * irq_domain_activate_irq - Call domain_ops->activate recursively to activate * interrupt - * @irq_data: outermost irq_data associated with interrupt + * @irq_data: Outermost irq_data associated with interrupt + * @reserve: If set only reserve an interrupt vector instead of assigning one * * This is the second step to call domain_ops->activate to program interrupt * controllers, so the interrupt could actually get delivered. */ -int irq_domain_activate_irq(struct irq_data *irq_data, bool early) +int irq_domain_activate_irq(struct irq_data *irq_data, bool reserve) { int ret = 0; if (!irqd_is_activated(irq_data)) - ret = __irq_domain_activate_irq(irq_data, early); + ret = __irq_domain_activate_irq(irq_data, reserve); if (!ret) irqd_set_activated(irq_data); return ret; diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index edb987b2c58d..2f3c4f5382cc 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -339,6 +339,40 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, return ret; } +/* + * Carefully check whether the device can use reservation mode. If + * reservation mode is enabled then the early activation will assign a + * dummy vector to the device. If the PCI/MSI device does not support + * masking of the entry then this can result in spurious interrupts when + * the device driver is not absolutely careful. But even then a malfunction + * of the hardware could result in a spurious interrupt on the dummy vector + * and render the device unusable. If the entry can be masked then the core + * logic will prevent the spurious interrupt and reservation mode can be + * used. For now reservation mode is restricted to PCI/MSI. + */ +static bool msi_check_reservation_mode(struct irq_domain *domain, + struct msi_domain_info *info, + struct device *dev) +{ + struct msi_desc *desc; + + if (domain->bus_token != DOMAIN_BUS_PCI_MSI) + return false; + + if (!(info->flags & MSI_FLAG_MUST_REACTIVATE)) + return false; + + if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask) + return false; + + /* + * Checking the first MSI descriptor is sufficient. MSIX supports + * masking and MSI does so when the maskbit is set. + */ + desc = first_msi_entry(dev); + return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit; +} + /** * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain * @domain: The domain to allocate from @@ -353,9 +387,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, { struct msi_domain_info *info = domain->host_data; struct msi_domain_ops *ops = info->ops; - msi_alloc_info_t arg; + struct irq_data *irq_data; struct msi_desc *desc; + msi_alloc_info_t arg; int i, ret, virq; + bool can_reserve; ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg); if (ret) @@ -385,6 +421,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, if (ops->msi_finish) ops->msi_finish(&arg, 0); + can_reserve = msi_check_reservation_mode(domain, info, dev); + for_each_msi_entry(desc, dev) { virq = desc->irq; if (desc->nvec_used == 1) @@ -397,15 +435,25 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, * the MSI entries before the PCI layer enables MSI in the * card. Otherwise the card latches a random msi message. */ - if (info->flags & MSI_FLAG_ACTIVATE_EARLY) { - struct irq_data *irq_data; + if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY)) + continue; + irq_data = irq_domain_get_irq_data(domain, desc->irq); + if (!can_reserve) + irqd_clr_can_reserve(irq_data); + ret = irq_domain_activate_irq(irq_data, can_reserve); + if (ret) + goto cleanup; + } + + /* + * If these interrupts use reservation mode, clear the activated bit + * so request_irq() will assign the final vector. + */ + if (can_reserve) { + for_each_msi_entry(desc, dev) { irq_data = irq_domain_get_irq_data(domain, desc->irq); - ret = irq_domain_activate_irq(irq_data, true); - if (ret) - goto cleanup; - if (info->flags & MSI_FLAG_MUST_REACTIVATE) - irqd_clr_activated(irq_data); + irqd_clr_activated(irq_data); } } return 0; diff --git a/kernel/pid.c b/kernel/pid.c index b13b624e2c49..1e8bb6550ec4 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -193,10 +193,8 @@ struct pid *alloc_pid(struct pid_namespace *ns) } if (unlikely(is_child_reaper(pid))) { - if (pid_ns_prepare_proc(ns)) { - disable_pid_allocation(ns); + if (pid_ns_prepare_proc(ns)) goto out_free; - } } get_pid_ns(ns); @@ -226,6 +224,10 @@ out_free: while (++i <= ns->level) idr_remove(&ns->idr, (pid->numbers + i)->nr); + /* On failure to allocate the first pid, reset the state */ + if (ns->pid_allocated == PIDNS_ADDING) + idr_set_cursor(&ns->idr, 0); + spin_unlock_irq(&pidmap_lock); kmem_cache_free(ns->pid_cachep, pid); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 2f52ec0f1539..d6717a3331a1 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -244,7 +244,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, #ifdef CONFIG_NO_HZ_COMMON static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { - unsigned long idle_calls = tick_nohz_get_idle_calls(); + unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu); bool ret = idle_calls == sg_cpu->saved_idle_calls; sg_cpu->saved_idle_calls = idle_calls; diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index e776fc8cc1df..f6b5f19223d6 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -95,6 +95,7 @@ config NO_HZ_FULL select RCU_NOCB_CPU select VIRT_CPU_ACCOUNTING_GEN select IRQ_WORK + select CPU_ISOLATION help Adaptively try to shutdown the tick whenever possible, even when the CPU is running tasks. Typically this requires running a single diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 99578f06c8d4..f7cc7abfcf25 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -650,6 +650,11 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) ts->next_tick = 0; } +static inline bool local_timer_softirq_pending(void) +{ + return local_softirq_pending() & TIMER_SOFTIRQ; +} + static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, ktime_t now, int cpu) { @@ -666,8 +671,18 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, } while (read_seqretry(&jiffies_lock, seq)); ts->last_jiffies = basejiff; - if (rcu_needs_cpu(basemono, &next_rcu) || - arch_needs_cpu() || irq_work_needs_cpu()) { + /* + * Keep the periodic tick, when RCU, architecture or irq_work + * requests it. + * Aside of that check whether the local timer softirq is + * pending. If so its a bad idea to call get_next_timer_interrupt() + * because there is an already expired timer, so it will request + * immeditate expiry, which rearms the hardware timer with a + * minimal delta which brings us back to this place + * immediately. Lather, rinse and repeat... + */ + if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() || + irq_work_needs_cpu() || local_timer_softirq_pending()) { next_tick = basemono + TICK_NSEC; } else { /* @@ -986,6 +1001,19 @@ ktime_t tick_nohz_get_sleep_length(void) } /** + * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value + * for a particular CPU. + * + * Called from the schedutil frequency scaling governor in scheduler context. + */ +unsigned long tick_nohz_get_idle_calls_cpu(int cpu) +{ + struct tick_sched *ts = tick_get_tick_sched(cpu); + + return ts->idle_calls; +} + +/** * tick_nohz_get_idle_calls - return the current idle calls counter value * * Called from the schedutil frequency scaling governor in scheduler context. diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ffebcf878fba..89a9e1b4264a 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -823,11 +823,10 @@ static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu) struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); /* - * If the timer is deferrable and nohz is active then we need to use - * the deferrable base. + * If the timer is deferrable and NO_HZ_COMMON is set then we need + * to use the deferrable base. */ - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && - (tflags & TIMER_DEFERRABLE)) + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu); return base; } @@ -837,11 +836,10 @@ static inline struct timer_base *get_timer_this_cpu_base(u32 tflags) struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); /* - * If the timer is deferrable and nohz is active then we need to use - * the deferrable base. + * If the timer is deferrable and NO_HZ_COMMON is set then we need + * to use the deferrable base. */ - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && - (tflags & TIMER_DEFERRABLE)) + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) base = this_cpu_ptr(&timer_bases[BASE_DEF]); return base; } @@ -1009,8 +1007,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option if (!ret && (options & MOD_TIMER_PENDING_ONLY)) goto out_unlock; - debug_activate(timer, expires); - new_base = get_target_base(base, timer->flags); if (base != new_base) { @@ -1034,6 +1030,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option } } + debug_activate(timer, expires); + timer->expires = expires; /* * If 'idx' was calculated above and the base time did not advance @@ -1684,7 +1682,7 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) base->must_forward_clk = false; __run_timers(base); - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) + if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); } @@ -1855,6 +1853,21 @@ static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *h } } +int timers_prepare_cpu(unsigned int cpu) +{ + struct timer_base *base; + int b; + + for (b = 0; b < NR_BASES; b++) { + base = per_cpu_ptr(&timer_bases[b], cpu); + base->clk = jiffies; + base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; + base->is_idle = false; + base->must_forward_clk = true; + } + return 0; +} + int timers_dead_cpu(unsigned int cpu) { struct timer_base *old_base; diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index c3e84edc47c9..2615074d3de5 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -346,7 +346,8 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj, static void zap_modalias_env(struct kobj_uevent_env *env) { static const char modalias_prefix[] = "MODALIAS="; - int i; + size_t len; + int i, j; for (i = 0; i < env->envp_idx;) { if (strncmp(env->envp[i], modalias_prefix, @@ -355,11 +356,18 @@ static void zap_modalias_env(struct kobj_uevent_env *env) continue; } - if (i != env->envp_idx - 1) - memmove(&env->envp[i], &env->envp[i + 1], - sizeof(env->envp[i]) * env->envp_idx - 1); + len = strlen(env->envp[i]) + 1; + + if (i != env->envp_idx - 1) { + memmove(env->envp[i], env->envp[i + 1], + env->buflen - len); + + for (j = i; j < env->envp_idx - 1; j++) + env->envp[j] = env->envp[j + 1] - len; + } env->envp_idx--; + env->buflen -= len; } } diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h index 57fd45ab7af1..08c60d10747f 100644 --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -671,7 +671,23 @@ do { \ ************** MIPS/64 ************** ***************************************/ #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 -#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) +#if defined(__mips_isa_rev) && __mips_isa_rev >= 6 +/* + * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C + * code below, so we special case MIPS64r6 until the compiler can do better. + */ +#define umul_ppmm(w1, w0, u, v) \ +do { \ + __asm__ ("dmulu %0,%1,%2" \ + : "=d" ((UDItype)(w0)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))); \ + __asm__ ("dmuhu %0,%1,%2" \ + : "=d" ((UDItype)(w1)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))); \ +} while (0) +#elif (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) #define umul_ppmm(w1, w0, u, v) \ do { \ typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ diff --git a/lib/timerqueue.c b/lib/timerqueue.c index 4a720ed4fdaf..0d54bcbc8170 100644 --- a/lib/timerqueue.c +++ b/lib/timerqueue.c @@ -33,8 +33,9 @@ * @head: head of timerqueue * @node: timer node to be added * - * Adds the timer node to the timerqueue, sorted by the - * node's expires value. + * Adds the timer node to the timerqueue, sorted by the node's expires + * value. Returns true if the newly added timer is the first expiring timer in + * the queue. */ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) { @@ -70,7 +71,8 @@ EXPORT_SYMBOL_GPL(timerqueue_add); * @head: head of timerqueue * @node: timer node to be removed * - * Removes the timer node from the timerqueue. + * Removes the timer node from the timerqueue. Returns true if the queue is + * not empty after the remove. */ bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) { diff --git a/mm/debug.c b/mm/debug.c index d947f3e03b0d..56e2d9125ea5 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -50,7 +50,7 @@ void __dump_page(struct page *page, const char *reason) */ int mapcount = PageSlab(page) ? 0 : page_mapcount(page); - pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx", + pr_emerg("page:%px count:%d mapcount:%d mapping:%px index:%#lx", page, page_ref_count(page), mapcount, page->mapping, page_to_pgoff(page)); if (PageCompound(page)) @@ -69,7 +69,7 @@ void __dump_page(struct page *page, const char *reason) #ifdef CONFIG_MEMCG if (page->mem_cgroup) - pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup); + pr_alert("page->mem_cgroup:%px\n", page->mem_cgroup); #endif } @@ -84,10 +84,10 @@ EXPORT_SYMBOL(dump_page); void dump_vma(const struct vm_area_struct *vma) { - pr_emerg("vma %p start %p end %p\n" - "next %p prev %p mm %p\n" - "prot %lx anon_vma %p vm_ops %p\n" - "pgoff %lx file %p private_data %p\n" + pr_emerg("vma %px start %px end %px\n" + "next %px prev %px mm %px\n" + "prot %lx anon_vma %px vm_ops %px\n" + "pgoff %lx file %px private_data %px\n" "flags: %#lx(%pGv)\n", vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next, vma->vm_prev, vma->vm_mm, @@ -100,27 +100,27 @@ EXPORT_SYMBOL(dump_vma); void dump_mm(const struct mm_struct *mm) { - pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n" + pr_emerg("mm %px mmap %px seqnum %d task_size %lu\n" #ifdef CONFIG_MMU - "get_unmapped_area %p\n" + "get_unmapped_area %px\n" #endif "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n" - "pgd %p mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n" + "pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n" "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n" "pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n" "start_code %lx end_code %lx start_data %lx end_data %lx\n" "start_brk %lx brk %lx start_stack %lx\n" "arg_start %lx arg_end %lx env_start %lx env_end %lx\n" - "binfmt %p flags %lx core_state %p\n" + "binfmt %px flags %lx core_state %px\n" #ifdef CONFIG_AIO - "ioctx_table %p\n" + "ioctx_table %px\n" #endif #ifdef CONFIG_MEMCG - "owner %p " + "owner %px " #endif - "exe_file %p\n" + "exe_file %px\n" #ifdef CONFIG_MMU_NOTIFIER - "mmu_notifier_mm %p\n" + "mmu_notifier_mm %px\n" #endif #ifdef CONFIG_NUMA_BALANCING "numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n" diff --git a/mm/mprotect.c b/mm/mprotect.c index ec39f730a0bf..58b629bb70de 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -166,7 +166,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, next = pmd_addr_end(addr, end); if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) && pmd_none_or_clear_bad(pmd)) - continue; + goto next; /* invoke the mmu notifier if the pmd is populated */ if (!mni_start) { @@ -188,7 +188,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, } /* huge pmd was handled */ - continue; + goto next; } } /* fall through, the trans huge pmd just split */ @@ -196,6 +196,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, this_pages = change_pte_range(vma, pmd, addr, next, newprot, dirty_accountable, prot_numa); pages += this_pages; +next: + cond_resched(); } while (pmd++, addr = next, addr != end); if (mni_start) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7e5e775e97f4..76c9688b6a0a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6260,6 +6260,8 @@ void __paginginit zero_resv_unavail(void) pgcnt = 0; for_each_resv_unavail_range(i, &start, &end) { for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) { + if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) + continue; mm_zero_struct_page(pfn_to_page(pfn)); pgcnt++; } diff --git a/mm/sparse.c b/mm/sparse.c index 7a5dacaa06e3..2609aba121e8 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -211,7 +211,7 @@ void __init memory_present(int nid, unsigned long start, unsigned long end) if (unlikely(!mem_section)) { unsigned long size, align; - size = sizeof(struct mem_section) * NR_SECTION_ROOTS; + size = sizeof(struct mem_section*) * NR_SECTION_ROOTS; align = 1 << (INTERNODE_CACHE_SHIFT); mem_section = memblock_virt_alloc(size, align); } diff --git a/mm/vmscan.c b/mm/vmscan.c index c02c850ea349..47d5ced51f2d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -297,10 +297,13 @@ EXPORT_SYMBOL(register_shrinker); */ void unregister_shrinker(struct shrinker *shrinker) { + if (!shrinker->nr_deferred) + return; down_write(&shrinker_rwsem); list_del(&shrinker->list); up_write(&shrinker_rwsem); kfree(shrinker->nr_deferred); + shrinker->nr_deferred = NULL; } EXPORT_SYMBOL(unregister_shrinker); diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 685049a9048d..683c0651098c 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -53,6 +53,7 @@ #include <linux/mount.h> #include <linux/migrate.h> #include <linux/pagemap.h> +#include <linux/fs.h> #define ZSPAGE_MAGIC 0x58 diff --git a/net/core/dev.c b/net/core/dev.c index d7925ef8743d..5cb782f074d7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1146,7 +1146,19 @@ EXPORT_SYMBOL(dev_alloc_name); int dev_get_valid_name(struct net *net, struct net_device *dev, const char *name) { - return dev_alloc_name_ns(net, dev, name); + BUG_ON(!net); + + if (!dev_valid_name(name)) + return -EINVAL; + + if (strchr(name, '%')) + return dev_alloc_name_ns(net, dev, name); + else if (__dev_get_by_name(net, name)) + return -EEXIST; + else if (dev->name != name) + strlcpy(dev->name, name, IFNAMSIZ); + + return 0; } EXPORT_SYMBOL(dev_get_valid_name); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 50a79203043b..fff6314f4c5e 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -771,15 +771,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev, return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); } -static void -warn_incomplete_ethtool_legacy_settings_conversion(const char *details) -{ - char name[sizeof(current->comm)]; - - pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n", - get_task_comm(name, current), details); -} - /* Query device for its ethtool_cmd settings. * * Backward compatibility note: for compatibility with legacy ethtool, @@ -806,10 +797,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) &link_ksettings); if (err < 0) return err; - if (!convert_link_ksettings_to_legacy_settings(&cmd, - &link_ksettings)) - warn_incomplete_ethtool_legacy_settings_conversion( - "link modes are only partially reported"); + convert_link_ksettings_to_legacy_settings(&cmd, + &link_ksettings); /* send a sensible cmd tag back to user */ cmd.cmd = ETHTOOL_GSET; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c688dc564b11..a0500eeda344 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1751,18 +1751,18 @@ static bool link_dump_filtered(struct net_device *dev, return false; } -static struct net *get_target_net(struct sk_buff *skb, int netnsid) +static struct net *get_target_net(struct sock *sk, int netnsid) { struct net *net; - net = get_net_ns_by_id(sock_net(skb->sk), netnsid); + net = get_net_ns_by_id(sock_net(sk), netnsid); if (!net) return ERR_PTR(-EINVAL); /* For now, the caller is required to have CAP_NET_ADMIN in * the user namespace owning the target net ns. */ - if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) { put_net(net); return ERR_PTR(-EACCES); } @@ -1803,7 +1803,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) ifla_policy, NULL) >= 0) { if (tb[IFLA_IF_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); - tgt_net = get_target_net(skb, netnsid); + tgt_net = get_target_net(skb->sk, netnsid); if (IS_ERR(tgt_net)) { tgt_net = net; netnsid = -1; @@ -2985,7 +2985,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[IFLA_IF_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); - tgt_net = get_target_net(skb, netnsid); + tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid); if (IS_ERR(tgt_net)) return PTR_ERR(tgt_net); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index edda5ad3b405..b5f19703fca6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1243,23 +1243,28 @@ out: * If fib6_add_1 has cleared the old leaf pointer in the * super-tree leaf node we have to find a new one for it. */ - struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf, - lockdep_is_held(&table->tb6_lock)); - if (pn != fn && pn_leaf == rt) { - pn_leaf = NULL; - RCU_INIT_POINTER(pn->leaf, NULL); - atomic_dec(&rt->rt6i_ref); - } - if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { - pn_leaf = fib6_find_prefix(info->nl_net, table, pn); -#if RT6_DEBUG >= 2 - if (!pn_leaf) { - WARN_ON(!pn_leaf); - pn_leaf = info->nl_net->ipv6.ip6_null_entry; + if (pn != fn) { + struct rt6_info *pn_leaf = + rcu_dereference_protected(pn->leaf, + lockdep_is_held(&table->tb6_lock)); + if (pn_leaf == rt) { + pn_leaf = NULL; + RCU_INIT_POINTER(pn->leaf, NULL); + atomic_dec(&rt->rt6i_ref); } + if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { + pn_leaf = fib6_find_prefix(info->nl_net, table, + pn); +#if RT6_DEBUG >= 2 + if (!pn_leaf) { + WARN_ON(!pn_leaf); + pn_leaf = + info->nl_net->ipv6.ip6_null_entry; + } #endif - atomic_inc(&pn_leaf->rt6i_ref); - rcu_assign_pointer(pn->leaf, pn_leaf); + atomic_inc(&pn_leaf->rt6i_ref); + rcu_assign_pointer(pn->leaf, pn_leaf); + } } #endif goto failure; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8a4610e84e58..8071f42cd8a0 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1077,10 +1077,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); } - } else if (!(t->parms.flags & - (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { - /* enable the cache only only if the routing decision does - * not depend on the current inner header value + } else if (t->parms.proto != 0 && !(t->parms.flags & + (IP6_TNL_F_USE_ORIG_TCLASS | + IP6_TNL_F_USE_ORIG_FWMARK))) { + /* enable the cache only if neither the outer protocol nor the + * routing decision depends on the current inner header value */ use_cache = true; } @@ -1679,11 +1680,11 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { struct ip6_tnl *tnl = netdev_priv(dev); - if (tnl->parms.proto == IPPROTO_IPIP) { - if (new_mtu < ETH_MIN_MTU) + if (tnl->parms.proto == IPPROTO_IPV6) { + if (new_mtu < IPV6_MIN_MTU) return -EINVAL; } else { - if (new_mtu < IPV6_MIN_MTU) + if (new_mtu < ETH_MIN_MTU) return -EINVAL; } if (new_mtu > 0xFFF8 - dev->hard_header_len) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b3cff69bfd66..fd580614085b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3625,6 +3625,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) } return true; case NL80211_IFTYPE_MESH_POINT: + if (ether_addr_equal(sdata->vif.addr, hdr->addr2)) + return false; if (multicast) return true; return ether_addr_equal(sdata->vif.addr, hdr->addr1); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index db0933256ec9..336b81689ac9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2107,7 +2107,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, continue; list_for_each_entry_rcu(chain, &table->chains, list) { - if (ctx && ctx->chain[0] && + if (ctx && ctx->chain && strcmp(ctx->chain, chain->name) != 0) continue; @@ -4700,8 +4700,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) { struct nft_obj_filter *filter = cb->data; - kfree(filter->table); - kfree(filter); + if (filter) { + kfree(filter->table); + kfree(filter); + } return 0; } diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 1f7fbd3c7e5a..06b090d8e901 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -55,21 +55,11 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret) static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) { - mm_segment_t oldfs = get_fs(); - int retval, fd; - if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX) return -EINVAL; - set_fs(KERNEL_DS); - fd = bpf_obj_get_user(path, 0); - set_fs(oldfs); - if (fd < 0) - return fd; - - retval = __bpf_mt_check_fd(fd, ret); - sys_close(fd); - return retval; + *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER); + return PTR_ERR_OR_ZERO(*ret); } static int bpf_mt_check(const struct xt_mtchk_param *par) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index bc2f1e0977d6..634cfcb7bba6 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -525,6 +525,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args) local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; + if (args->nr_local == 0) + return -EINVAL; + /* figure out the number of pages in the vector */ for (i = 0; i < args->nr_local; i++) { if (copy_from_user(&vec, &local_vec[i], @@ -874,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, err: if (page) put_page(page); + rm->atomic.op_active = 0; kfree(rm->atomic.op_notifier); return ret; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 9d632e92cad0..b56986d41c87 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -159,7 +159,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, if (action == TC_ACT_SHOT) this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; - tm->lastuse = lastuse; + tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 37e5e4decbd6..e6ff88f72900 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -231,7 +231,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, struct tcf_t *tm = &m->tcf_tm; _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); - tm->lastuse = lastuse; + tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, diff --git a/net/sctp/input.c b/net/sctp/input.c index 621b5ca3fd1c..141c9c466ec1 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -399,20 +399,24 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, return; } - if (t->param_flags & SPP_PMTUD_ENABLE) { - /* Update transports view of the MTU */ - sctp_transport_update_pmtu(t, pmtu); - - /* Update association pmtu. */ - sctp_assoc_sync_pmtu(asoc); - } + if (!(t->param_flags & SPP_PMTUD_ENABLE)) + /* We can't allow retransmitting in such case, as the + * retransmission would be sized just as before, and thus we + * would get another icmp, and retransmit again. + */ + return; - /* Retransmit with the new pmtu setting. - * Normally, if PMTU discovery is disabled, an ICMP Fragmentation - * Needed will never be sent, but if a message was sent before - * PMTU discovery was disabled that was larger than the PMTU, it - * would not be fragmented, so it must be re-transmitted fragmented. + /* Update transports view of the MTU. Return if no update was needed. + * If an update wasn't needed/possible, it also doesn't make sense to + * try to retransmit now. */ + if (!sctp_transport_update_pmtu(t, pmtu)) + return; + + /* Update association pmtu. */ + sctp_assoc_sync_pmtu(asoc); + + /* Retransmit with the new pmtu setting. */ sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); } diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 06b644dd858c..cedf672487f9 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -156,9 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, sctp_stream_outq_migrate(stream, NULL, outcnt); sched->sched_all(stream); - i = sctp_stream_alloc_out(stream, outcnt, gfp); - if (i) - return i; + ret = sctp_stream_alloc_out(stream, outcnt, gfp); + if (ret) + goto out; stream->outcnt = outcnt; for (i = 0; i < stream->outcnt; i++) @@ -171,19 +171,17 @@ in: if (!incnt) goto out; - i = sctp_stream_alloc_in(stream, incnt, gfp); - if (i) { - ret = -ENOMEM; - goto free; + ret = sctp_stream_alloc_in(stream, incnt, gfp); + if (ret) { + sched->free(stream); + kfree(stream->out); + stream->out = NULL; + stream->outcnt = 0; + goto out; } stream->incnt = incnt; - goto out; -free: - sched->free(stream); - kfree(stream->out); - stream->out = NULL; out: return ret; } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 1e5a22430cf5..47f82bd794d9 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -248,28 +248,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) +bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { struct dst_entry *dst = sctp_transport_dst_check(t); + bool change = true; if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { - pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", - __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); - /* Use default minimum segment size and disable - * pmtu discovery on this transport. - */ - t->pathmtu = SCTP_DEFAULT_MINSEGMENT; - } else { - t->pathmtu = pmtu; + pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n", + __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); + /* Use default minimum segment instead */ + pmtu = SCTP_DEFAULT_MINSEGMENT; } + pmtu = SCTP_TRUNC4(pmtu); if (dst) { dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); dst = sctp_transport_dst_check(t); } - if (!dst) + if (!dst) { t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); + dst = t->dst; + } + + if (dst) { + /* Re-fetch, as under layers may have a higher minimum size */ + pmtu = SCTP_TRUNC4(dst_mtu(dst)); + change = t->pathmtu != pmtu; + } + t->pathmtu = pmtu; + + return change; } /* Caches the dst entry and source address for a transport's destination diff --git a/net/tipc/group.c b/net/tipc/group.c index fb7fe971e51b..3e8268d966fa 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -109,7 +109,8 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, static void tipc_group_decr_active(struct tipc_group *grp, struct tipc_member *m) { - if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING) + if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING || + m->state == MBR_REMITTED) grp->active_cnt--; } @@ -560,7 +561,7 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, int max_active = grp->max_active; int reclaim_limit = max_active * 3 / 4; int active_cnt = grp->active_cnt; - struct tipc_member *m, *rm; + struct tipc_member *m, *rm, *pm; m = tipc_group_find_member(grp, node, port); if (!m) @@ -603,6 +604,17 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, pr_warn_ratelimited("Rcv unexpected msg after REMIT\n"); tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); } + grp->active_cnt--; + list_del_init(&m->list); + if (list_empty(&grp->pending)) + return; + + /* Set oldest pending member to active and advertise */ + pm = list_first_entry(&grp->pending, struct tipc_member, list); + pm->state = MBR_ACTIVE; + list_move_tail(&pm->list, &grp->active); + grp->active_cnt++; + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); break; case MBR_RECLAIMING: case MBR_DISCOVERED: @@ -740,14 +752,14 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, if (!m || m->state != MBR_RECLAIMING) return; - list_del_init(&m->list); - grp->active_cnt--; remitted = msg_grp_remitted(hdr); /* Messages preceding the REMIT still in receive queue */ if (m->advertised > remitted) { m->state = MBR_REMITTED; in_flight = m->advertised - remitted; + m->advertised = ADV_IDLE + in_flight; + return; } /* All messages preceding the REMIT have been read */ if (m->advertised <= remitted) { @@ -759,6 +771,8 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); m->advertised = ADV_IDLE + in_flight; + grp->active_cnt--; + list_del_init(&m->list); /* Set oldest pending member to active and advertise */ if (list_empty(&grp->pending)) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 79a9ff682b7e..c084dd2205ac 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11387,7 +11387,8 @@ static int nl80211_nan_add_func(struct sk_buff *skb, break; case NL80211_NAN_FUNC_FOLLOW_UP: if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] || - !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) { + !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] || + !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) { err = -EINVAL; goto out; } diff --git a/security/Kconfig b/security/Kconfig index e8e449444e65..3d4debd0257e 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -54,6 +54,17 @@ config SECURITY_NETWORK implement socket and networking access controls. If you are unsure how to answer this question, answer N. +config PAGE_TABLE_ISOLATION + bool "Remove the kernel mapping in user mode" + default y + depends on X86_64 && !UML + help + This feature reduces the number of hardware side channels by + ensuring that the majority of kernel addresses are not mapped + into userspace. + + See Documentation/x86/pagetable-isolation.txt for more details. + config SECURITY_INFINIBAND bool "Infiniband Security Hooks" depends on SECURITY && INFINIBAND diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index ed9b4d0f9f7e..8c558cbce930 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -329,6 +329,9 @@ static int match_mnt_path_str(struct aa_profile *profile, AA_BUG(!mntpath); AA_BUG(!buffer); + if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) + return 0; + error = aa_path_name(mntpath, path_flags(profile, mntpath), buffer, &mntpnt, &info, profile->disconnected); if (error) @@ -380,6 +383,9 @@ static int match_mnt(struct aa_profile *profile, const struct path *path, AA_BUG(!profile); AA_BUG(devpath && !devbuffer); + if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) + return 0; + if (devpath) { error = aa_path_name(devpath, path_flags(profile, devpath), devbuffer, &devname, &info, @@ -558,6 +564,9 @@ static int profile_umount(struct aa_profile *profile, struct path *path, AA_BUG(!profile); AA_BUG(!path); + if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) + return 0; + error = aa_path_name(path, path_flags(profile, path), buffer, &name, &info, profile->disconnected); if (error) @@ -613,7 +622,8 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile, AA_BUG(!new_path); AA_BUG(!old_path); - if (profile_unconfined(profile)) + if (profile_unconfined(profile) || + !PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) return aa_get_newest_label(&profile->label); error = aa_path_name(old_path, path_flags(profile, old_path), diff --git a/security/commoncap.c b/security/commoncap.c index 4f8e09340956..48620c93d697 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -348,21 +348,18 @@ static __u32 sansflags(__u32 m) return m & ~VFS_CAP_FLAGS_EFFECTIVE; } -static bool is_v2header(size_t size, __le32 magic) +static bool is_v2header(size_t size, const struct vfs_cap_data *cap) { - __u32 m = le32_to_cpu(magic); if (size != XATTR_CAPS_SZ_2) return false; - return sansflags(m) == VFS_CAP_REVISION_2; + return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2; } -static bool is_v3header(size_t size, __le32 magic) +static bool is_v3header(size_t size, const struct vfs_cap_data *cap) { - __u32 m = le32_to_cpu(magic); - if (size != XATTR_CAPS_SZ_3) return false; - return sansflags(m) == VFS_CAP_REVISION_3; + return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3; } /* @@ -405,7 +402,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, fs_ns = inode->i_sb->s_user_ns; cap = (struct vfs_cap_data *) tmpbuf; - if (is_v2header((size_t) ret, cap->magic_etc)) { + if (is_v2header((size_t) ret, cap)) { /* If this is sizeof(vfs_cap_data) then we're ok with the * on-disk value, so return that. */ if (alloc) @@ -413,7 +410,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, else kfree(tmpbuf); return ret; - } else if (!is_v3header((size_t) ret, cap->magic_etc)) { + } else if (!is_v3header((size_t) ret, cap)) { kfree(tmpbuf); return -EINVAL; } @@ -470,9 +467,9 @@ static kuid_t rootid_from_xattr(const void *value, size_t size, return make_kuid(task_ns, rootid); } -static bool validheader(size_t size, __le32 magic) +static bool validheader(size_t size, const struct vfs_cap_data *cap) { - return is_v2header(size, magic) || is_v3header(size, magic); + return is_v2header(size, cap) || is_v3header(size, cap); } /* @@ -495,7 +492,7 @@ int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size) if (!*ivalue) return -EINVAL; - if (!validheader(size, cap->magic_etc)) + if (!validheader(size, cap)) return -EINVAL; if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP)) return -EPERM; diff --git a/tools/arch/s390/include/uapi/asm/perf_regs.h b/tools/arch/s390/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..d17dd9e5d516 --- /dev/null +++ b/tools/arch/s390/include/uapi/asm/perf_regs.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_S390_PERF_REGS_H +#define _ASM_S390_PERF_REGS_H + +enum perf_event_s390_regs { + PERF_REG_S390_R0, + PERF_REG_S390_R1, + PERF_REG_S390_R2, + PERF_REG_S390_R3, + PERF_REG_S390_R4, + PERF_REG_S390_R5, + PERF_REG_S390_R6, + PERF_REG_S390_R7, + PERF_REG_S390_R8, + PERF_REG_S390_R9, + PERF_REG_S390_R10, + PERF_REG_S390_R11, + PERF_REG_S390_R12, + PERF_REG_S390_R13, + PERF_REG_S390_R14, + PERF_REG_S390_R15, + PERF_REG_S390_FP0, + PERF_REG_S390_FP1, + PERF_REG_S390_FP2, + PERF_REG_S390_FP3, + PERF_REG_S390_FP4, + PERF_REG_S390_FP5, + PERF_REG_S390_FP6, + PERF_REG_S390_FP7, + PERF_REG_S390_FP8, + PERF_REG_S390_FP9, + PERF_REG_S390_FP10, + PERF_REG_S390_FP11, + PERF_REG_S390_FP12, + PERF_REG_S390_FP13, + PERF_REG_S390_FP14, + PERF_REG_S390_FP15, + PERF_REG_S390_MASK, + PERF_REG_S390_PC, + + PERF_REG_S390_MAX +}; + +#endif /* _ASM_S390_PERF_REGS_H */ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 8acfc47af70e..540a209b78ab 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -138,7 +138,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, *type = INSN_STACK; op->src.type = OP_SRC_ADD; op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; - op->dest.type = OP_SRC_REG; + op->dest.type = OP_DEST_REG; op->dest.reg = CFI_SP; } break; diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index 4c6b5c9ef073..91e8e19ff5e0 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -44,6 +44,9 @@ int cmd_orc(int argc, const char **argv) const char *objname; argc--; argv++; + if (argc <= 0) + usage_with_options(orc_usage, check_options); + if (!strncmp(argv[0], "gen", 3)) { argc = parse_options(argc, argv, check_options, orc_usage, 0); if (argc != 1) @@ -52,7 +55,6 @@ int cmd_orc(int argc, const char **argv) objname = argv[0]; return check(objname, no_fp, no_unreachable, true); - } if (!strcmp(argv[0], "dump")) { diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index e5ca31429c9b..e61fe703197b 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -165,6 +165,8 @@ int create_orc_sections(struct objtool_file *file) /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx); + if (!sec) + return -1; ip_relasec = elf_create_rela_section(file->elf, sec); if (!ip_relasec) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index ed65e82f034e..0294bfb6c5f8 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -188,9 +188,7 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - ifeq ($(CC_NO_CLANG), 1) - PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) - endif + PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) endif @@ -576,14 +574,15 @@ ifndef NO_GTK2 endif endif - ifdef NO_LIBPERL CFLAGS += -DNO_LIBPERL else PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) - PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` + PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) + PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) ifneq ($(feature-libperl), 1) diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h index d2df54a6bc5a..bcfbaed78cc2 100644 --- a/tools/perf/arch/s390/include/perf_regs.h +++ b/tools/perf/arch/s390/include/perf_regs.h @@ -3,7 +3,7 @@ #include <stdlib.h> #include <linux/types.h> -#include <../../../../arch/s390/include/uapi/asm/perf_regs.h> +#include <asm/perf_regs.h> void perf_regs_load(u64 *regs); diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 6db9d809fe97..3e64f10b6d66 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -21,6 +21,7 @@ arch/x86/include/asm/cpufeatures.h arch/arm/include/uapi/asm/perf_regs.h arch/arm64/include/uapi/asm/perf_regs.h arch/powerpc/include/uapi/asm/perf_regs.h +arch/s390/include/uapi/asm/perf_regs.h arch/x86/include/uapi/asm/perf_regs.h arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm_perf.h diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index cf36de7ea255..0c6d1002b524 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -384,13 +384,13 @@ jvmti_write_code(void *agent, char const *sym, } int -jvmti_write_debug_info(void *agent, uint64_t code, const char *file, - jvmti_line_info_t *li, int nr_lines) +jvmti_write_debug_info(void *agent, uint64_t code, + int nr_lines, jvmti_line_info_t *li, + const char * const * file_names) { struct jr_code_debug_info rec; - size_t sret, len, size, flen; + size_t sret, len, size, flen = 0; uint64_t addr; - const char *fn = file; FILE *fp = agent; int i; @@ -405,7 +405,9 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file, return -1; } - flen = strlen(file) + 1; + for (i = 0; i < nr_lines; ++i) { + flen += strlen(file_names[i]) + 1; + } rec.p.id = JIT_CODE_DEBUG_INFO; size = sizeof(rec); @@ -421,7 +423,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file, * file[] : source file name */ size += nr_lines * sizeof(struct debug_entry); - size += flen * nr_lines; + size += flen; rec.p.total_size = size; /* @@ -452,7 +454,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file, if (sret != 1) goto error; - sret = fwrite_unlocked(fn, flen, 1, fp); + sret = fwrite_unlocked(file_names[i], strlen(file_names[i]) + 1, 1, fp); if (sret != 1) goto error; } diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h index fe32d8344a82..6ed82f6c06dd 100644 --- a/tools/perf/jvmti/jvmti_agent.h +++ b/tools/perf/jvmti/jvmti_agent.h @@ -14,6 +14,7 @@ typedef struct { unsigned long pc; int line_number; int discrim; /* discriminator -- 0 for now */ + jmethodID methodID; } jvmti_line_info_t; void *jvmti_open(void); @@ -22,11 +23,9 @@ int jvmti_write_code(void *agent, char const *symbol_name, uint64_t vma, void const *code, const unsigned int code_size); -int jvmti_write_debug_info(void *agent, - uint64_t code, - const char *file, +int jvmti_write_debug_info(void *agent, uint64_t code, int nr_lines, jvmti_line_info_t *li, - int nr_lines); + const char * const * file_names); #if defined(__cplusplus) } diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c index c62c9fc9a525..6add3e982614 100644 --- a/tools/perf/jvmti/libjvmti.c +++ b/tools/perf/jvmti/libjvmti.c @@ -47,6 +47,7 @@ do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci, tab[lines].pc = (unsigned long)pc; tab[lines].line_number = loc_tab[i].line_number; tab[lines].discrim = 0; /* not yet used */ + tab[lines].methodID = m; lines++; } else { break; @@ -125,6 +126,99 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t ** return JVMTI_ERROR_NONE; } +static void +copy_class_filename(const char * class_sign, const char * file_name, char * result, size_t max_length) +{ + /* + * Assume path name is class hierarchy, this is a common practice with Java programs + */ + if (*class_sign == 'L') { + int j, i = 0; + char *p = strrchr(class_sign, '/'); + if (p) { + /* drop the 'L' prefix and copy up to the final '/' */ + for (i = 0; i < (p - class_sign); i++) + result[i] = class_sign[i+1]; + } + /* + * append file name, we use loops and not string ops to avoid modifying + * class_sign which is used later for the symbol name + */ + for (j = 0; i < (max_length - 1) && file_name && j < strlen(file_name); j++, i++) + result[i] = file_name[j]; + + result[i] = '\0'; + } else { + /* fallback case */ + size_t file_name_len = strlen(file_name); + strncpy(result, file_name, file_name_len < max_length ? file_name_len : max_length); + } +} + +static jvmtiError +get_source_filename(jvmtiEnv *jvmti, jmethodID methodID, char ** buffer) +{ + jvmtiError ret; + jclass decl_class; + char *file_name = NULL; + char *class_sign = NULL; + char fn[PATH_MAX]; + size_t len; + + ret = (*jvmti)->GetMethodDeclaringClass(jvmti, methodID, &decl_class); + if (ret != JVMTI_ERROR_NONE) { + print_error(jvmti, "GetMethodDeclaringClass", ret); + return ret; + } + + ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name); + if (ret != JVMTI_ERROR_NONE) { + print_error(jvmti, "GetSourceFileName", ret); + return ret; + } + + ret = (*jvmti)->GetClassSignature(jvmti, decl_class, &class_sign, NULL); + if (ret != JVMTI_ERROR_NONE) { + print_error(jvmti, "GetClassSignature", ret); + goto free_file_name_error; + } + + copy_class_filename(class_sign, file_name, fn, PATH_MAX); + len = strlen(fn); + *buffer = malloc((len + 1) * sizeof(char)); + if (!*buffer) { + print_error(jvmti, "GetClassSignature", ret); + ret = JVMTI_ERROR_OUT_OF_MEMORY; + goto free_class_sign_error; + } + strcpy(*buffer, fn); + ret = JVMTI_ERROR_NONE; + +free_class_sign_error: + (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign); +free_file_name_error: + (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name); + + return ret; +} + +static jvmtiError +fill_source_filenames(jvmtiEnv *jvmti, int nr_lines, + const jvmti_line_info_t * line_tab, + char ** file_names) +{ + int index; + jvmtiError ret; + + for (index = 0; index < nr_lines; ++index) { + ret = get_source_filename(jvmti, line_tab[index].methodID, &(file_names[index])); + if (ret != JVMTI_ERROR_NONE) + return ret; + } + + return JVMTI_ERROR_NONE; +} + static void JNICALL compiled_method_load_cb(jvmtiEnv *jvmti, jmethodID method, @@ -135,16 +229,18 @@ compiled_method_load_cb(jvmtiEnv *jvmti, const void *compile_info) { jvmti_line_info_t *line_tab = NULL; + char ** line_file_names = NULL; jclass decl_class; char *class_sign = NULL; char *func_name = NULL; char *func_sign = NULL; - char *file_name= NULL; + char *file_name = NULL; char fn[PATH_MAX]; uint64_t addr = (uint64_t)(uintptr_t)code_addr; jvmtiError ret; int nr_lines = 0; /* in line_tab[] */ size_t len; + int output_debug_info = 0; ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method, &decl_class); @@ -158,6 +254,19 @@ compiled_method_load_cb(jvmtiEnv *jvmti, if (ret != JVMTI_ERROR_NONE) { warnx("jvmti: cannot get line table for method"); nr_lines = 0; + } else if (nr_lines > 0) { + line_file_names = malloc(sizeof(char*) * nr_lines); + if (!line_file_names) { + warnx("jvmti: cannot allocate space for line table method names"); + } else { + memset(line_file_names, 0, sizeof(char*) * nr_lines); + ret = fill_source_filenames(jvmti, nr_lines, line_tab, line_file_names); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: fill_source_filenames failed"); + } else { + output_debug_info = 1; + } + } } } @@ -181,33 +290,14 @@ compiled_method_load_cb(jvmtiEnv *jvmti, goto error; } - /* - * Assume path name is class hierarchy, this is a common practice with Java programs - */ - if (*class_sign == 'L') { - int j, i = 0; - char *p = strrchr(class_sign, '/'); - if (p) { - /* drop the 'L' prefix and copy up to the final '/' */ - for (i = 0; i < (p - class_sign); i++) - fn[i] = class_sign[i+1]; - } - /* - * append file name, we use loops and not string ops to avoid modifying - * class_sign which is used later for the symbol name - */ - for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++) - fn[i] = file_name[j]; - fn[i] = '\0'; - } else { - /* fallback case */ - strcpy(fn, file_name); - } + copy_class_filename(class_sign, file_name, fn, PATH_MAX); + /* * write source line info record if we have it */ - if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines)) - warnx("jvmti: write_debug_info() failed"); + if (output_debug_info) + if (jvmti_write_debug_info(jvmti_agent, addr, nr_lines, line_tab, (const char * const *) line_file_names)) + warnx("jvmti: write_debug_info() failed"); len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2; { @@ -223,6 +313,13 @@ error: (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign); (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name); free(line_tab); + while (line_file_names && (nr_lines > 0)) { + if (line_file_names[nr_lines - 1]) { + free(line_file_names[nr_lines - 1]); + } + nr_lines -= 1; + } + free(line_file_names); } static void JNICALL diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 0304ffb714f2..1aef72df20a1 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -122,8 +122,7 @@ static void check_valid_segment(uint16_t index, int ldt, * NB: Different Linux versions do different things with the * accessed bit in set_thread_area(). */ - if (ar != expected_ar && - (ldt || ar != (expected_ar | AR_ACCESSED))) { + if (ar != expected_ar && ar != (expected_ar | AR_ACCESSED)) { printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n", (ldt ? "LDT" : "GDT"), index, ar, expected_ar); nerrs++; diff --git a/tools/usb/usbip/src/utils.c b/tools/usb/usbip/src/utils.c index 2b3d6d235015..3d7b42e77299 100644 --- a/tools/usb/usbip/src/utils.c +++ b/tools/usb/usbip/src/utils.c @@ -30,6 +30,7 @@ int modify_match_busid(char *busid, int add) char command[SYSFS_BUS_ID_SIZE + 4]; char match_busid_attr_path[SYSFS_PATH_MAX]; int rc; + int cmd_size; snprintf(match_busid_attr_path, sizeof(match_busid_attr_path), "%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME, @@ -37,12 +38,14 @@ int modify_match_busid(char *busid, int add) attr_name); if (add) - snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", busid); + cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", + busid); else - snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", busid); + cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", + busid); rc = write_sysfs_attribute(match_busid_attr_path, command, - sizeof(command)); + cmd_size); if (rc < 0) { dbg("failed to write match_busid: %s", strerror(errno)); return -1; |