diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-28 19:59:38 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-28 19:59:38 +0200 |
commit | 77b1a7f7a05c673c187894b4ae898a8c0cdc776c (patch) | |
tree | e5f86addd81715393efcc7e3ba07cd68de31adf4 | |
parent | Merge tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/... (diff) | |
parent | kernel/time/posix-stubs.c: remove duplicated include (diff) | |
download | linux-77b1a7f7a05c673c187894b4ae898a8c0cdc776c.tar.xz linux-77b1a7f7a05c673c187894b4ae898a8c0cdc776c.zip |
Merge tag 'mm-nonmm-stable-2023-06-24-19-23' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-mm updates from Andrew Morton:
- Arnd Bergmann has fixed a bunch of -Wmissing-prototypes in top-level
directories
- Douglas Anderson has added a new "buddy" mode to the hardlockup
detector. It permits the detector to work on architectures which
cannot provide the required interrupts, by having CPUs periodically
perform checks on other CPUs
- Zhen Lei has enhanced kexec's ability to support two crash regions
- Petr Mladek has done a lot of cleanup on the hard lockup detector's
Kconfig entries
- And the usual bunch of singleton patches in various places
* tag 'mm-nonmm-stable-2023-06-24-19-23' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (72 commits)
kernel/time/posix-stubs.c: remove duplicated include
ocfs2: remove redundant assignment to variable bit_off
watchdog/hardlockup: fix typo in config HARDLOCKUP_DETECTOR_PREFER_BUDDY
powerpc: move arch_trigger_cpumask_backtrace from nmi.h to irq.h
devres: show which resource was invalid in __devm_ioremap_resource()
watchdog/hardlockup: define HARDLOCKUP_DETECTOR_ARCH
watchdog/sparc64: define HARDLOCKUP_DETECTOR_SPARC64
watchdog/hardlockup: make HAVE_NMI_WATCHDOG sparc64-specific
watchdog/hardlockup: declare arch_touch_nmi_watchdog() only in linux/nmi.h
watchdog/hardlockup: make the config checks more straightforward
watchdog/hardlockup: sort hardlockup detector related config values a logical way
watchdog/hardlockup: move SMP barriers from common code to buddy code
watchdog/buddy: simplify the dependency for HARDLOCKUP_DETECTOR_PREFER_BUDDY
watchdog/buddy: don't copy the cpumask in watchdog_next_cpu()
watchdog/buddy: cleanup how watchdog_buddy_check_hardlockup() is called
watchdog/hardlockup: remove softlockup comment in touch_nmi_watchdog()
watchdog/hardlockup: in watchdog_hardlockup_check() use cpumask_copy()
watchdog/hardlockup: don't use raw_cpu_ptr() in watchdog_hardlockup_kick()
watchdog/hardlockup: HAVE_NMI_WATCHDOG must implement watchdog_hardlockup_probe()
watchdog/hardlockup: keep kernel.nmi_watchdog sysctl as 0444 if probe fails
...
82 files changed, 957 insertions, 422 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index d6a68656950e..aff2746c8af2 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -426,20 +426,14 @@ config HAVE_HARDLOCKUP_DETECTOR_PERF The arch chooses to use the generic perf-NMI-based hardlockup detector. Must define HAVE_PERF_EVENTS_NMI. -config HAVE_NMI_WATCHDOG - depends on HAVE_NMI - bool - help - The arch provides a low level NMI watchdog. It provides - asm/nmi.h, and defines its own arch_touch_nmi_watchdog(). - config HAVE_HARDLOCKUP_DETECTOR_ARCH bool - select HAVE_NMI_WATCHDOG help - The arch chooses to provide its own hardlockup detector, which is - a superset of the HAVE_NMI_WATCHDOG. It also conforms to config - interfaces and parameters provided by hardlockup detector subsystem. + The arch provides its own hardlockup detector implementation instead + of the generic ones. + + It uses the same command line parameters, and sysctl interface, + as the generic hardlockup detectors. config HAVE_PERF_REGS bool diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h index a7c2337b0c7d..18605f1b3580 100644 --- a/arch/arm/include/asm/irq.h +++ b/arch/arm/include/asm/irq.h @@ -27,7 +27,6 @@ struct irqaction; struct pt_regs; void handle_IRQ(unsigned int, struct pt_regs *); -void init_IRQ(void); #ifdef CONFIG_SMP #include <linux/cpumask.h> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index d91829189dd4..3ae5c03ded0e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -204,6 +204,8 @@ config ARM64 select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_GCC_PLUGINS + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \ + HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IOREMAP_PROT select HAVE_IRQ_TIME_ACCOUNTING @@ -211,6 +213,7 @@ config ARM64 select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_PERF_EVENTS + select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_PREEMPT_DYNAMIC_KEY diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 848739c15de8..553d1bc559c6 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -55,10 +55,6 @@ struct thread_info { void arch_setup_new_exec(void); #define arch_setup_new_exec arch_setup_new_exec -void arch_release_task_struct(struct task_struct *tsk); -int arch_dup_task_struct(struct task_struct *dst, - struct task_struct *src); - #endif #define TIF_SIGPENDING 0 /* signal pending */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 3864a64e2b2b..d95b3d6b471a 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_KUSER_HELPERS) += kuser32.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o obj-$(CONFIG_MODULES) += module.o module-plts.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o +obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o obj-$(CONFIG_CPU_IDLE) += cpuidle.o diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c new file mode 100644 index 000000000000..dcd25322127c --- /dev/null +++ b/arch/arm64/kernel/watchdog_hld.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/nmi.h> +#include <linux/cpufreq.h> +#include <linux/perf/arm_pmu.h> + +/* + * Safe maximum CPU frequency in case a particular platform doesn't implement + * cpufreq driver. Although, architecture doesn't put any restrictions on + * maximum frequency but 5 GHz seems to be safe maximum given the available + * Arm CPUs in the market which are clocked much less than 5 GHz. On the other + * hand, we can't make it much higher as it would lead to a large hard-lockup + * detection timeout on parts which are running slower (eg. 1GHz on + * Developerbox) and doesn't possess a cpufreq driver. + */ +#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz +u64 hw_nmi_get_sample_period(int watchdog_thresh) +{ + unsigned int cpu = smp_processor_id(); + unsigned long max_cpu_freq; + + max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL; + if (!max_cpu_freq) + max_cpu_freq = SAFE_MAX_CPU_FREQ; + + return (u64)max_cpu_freq * watchdog_thresh; +} + +bool __init arch_perf_nmi_is_available(void) +{ + /* + * hardlockup_detector_perf_init() will success even if Pseudo-NMI turns off, + * however, the pmu interrupts will act like a normal interrupt instead of + * NMI and the hardlockup detector would be broken. + */ + return arm_pmu_irq_is_nmi(); +} diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h index a06cc1f97aa9..3657f5e78a3d 100644 --- a/arch/microblaze/include/asm/setup.h +++ b/arch/microblaze/include/asm/setup.h @@ -16,8 +16,6 @@ extern char *klimit; extern void mmu_reset(void); -void time_init(void); -void init_IRQ(void); void machine_early_init(const char *cmdline, unsigned int ram, unsigned int fdt, unsigned int msr, unsigned int tlb0, unsigned int tlb1); diff --git a/arch/mips/include/asm/fw/cfe/cfe_api.h b/arch/mips/include/asm/fw/cfe/cfe_api.h index 25df2f4deb31..b52a6a9c26f1 100644 --- a/arch/mips/include/asm/fw/cfe/cfe_api.h +++ b/arch/mips/include/asm/fw/cfe/cfe_api.h @@ -17,9 +17,6 @@ #include <linux/types.h> #include <linux/string.h> -typedef long intptr_t; - - /* * Constants */ diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index 44f9824c1d8c..75abfa834ab7 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -19,7 +19,6 @@ #define IRQ_STACK_SIZE THREAD_SIZE #define IRQ_STACK_START (IRQ_STACK_SIZE - 16) -extern void __init init_IRQ(void); extern void *irq_stack[NR_CPUS]; /* diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 6b6eaa485946..d0eb1bd19a13 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -271,7 +271,6 @@ void arch_send_call_function_single_ipi(int cpu) static void smp_cpu_init(int cpunum) { - extern void init_IRQ(void); /* arch/parisc/kernel/irq.c */ extern void start_cpu_itimer(void); /* arch/parisc/kernel/time.c */ /* Set modes and Enable floating point coprocessor */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index bff5820b7cda..8eb6c5e9e4f8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -90,8 +90,7 @@ config NMI_IPI config PPC_WATCHDOG bool - depends on HARDLOCKUP_DETECTOR - depends on HAVE_HARDLOCKUP_DETECTOR_ARCH + depends on HARDLOCKUP_DETECTOR_ARCH default y help This is a placeholder when the powerpc hardlockup detector @@ -240,7 +239,7 @@ config PPC select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC select HAVE_GENERIC_VDSO select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP - select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IOREMAP_PROT select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index deadd2149426..f257cacb49a9 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -50,9 +50,14 @@ extern void *hardirq_ctx[NR_CPUS]; extern void *softirq_ctx[NR_CPUS]; void __do_IRQ(struct pt_regs *regs); -extern void __init init_IRQ(void); int irq_choose_cpu(const struct cpumask *mask); +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) +extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, + bool exclude_self); +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace +#endif + #endif /* _ASM_IRQ_H */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index c3c7adef74de..49a75340c3e0 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -3,18 +3,10 @@ #define _ASM_NMI_H #ifdef CONFIG_PPC_WATCHDOG -extern void arch_touch_nmi_watchdog(void); long soft_nmi_interrupt(struct pt_regs *regs); -void watchdog_nmi_set_timeout_pct(u64 pct); +void watchdog_hardlockup_set_timeout_pct(u64 pct); #else -static inline void arch_touch_nmi_watchdog(void) {} -static inline void watchdog_nmi_set_timeout_pct(u64 pct) {} -#endif - -#ifdef CONFIG_NMI_IPI -extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, - bool exclude_self); -#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace +static inline void watchdog_hardlockup_set_timeout_pct(u64 pct) {} #endif extern void hv_nmi_check_nonrecoverable(struct pt_regs *regs); diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index dbcc4a793f0b..edb2dd1f53eb 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -438,7 +438,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { int cpu = smp_processor_id(); - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)) return HRTIMER_NORESTART; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) @@ -479,7 +479,7 @@ static void start_watchdog(void *arg) return; } - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)) return; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) @@ -546,7 +546,7 @@ static void watchdog_calc_timeouts(void) wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; } -void watchdog_nmi_stop(void) +void watchdog_hardlockup_stop(void) { int cpu; @@ -554,7 +554,7 @@ void watchdog_nmi_stop(void) stop_watchdog_on_cpu(cpu); } -void watchdog_nmi_start(void) +void watchdog_hardlockup_start(void) { int cpu; @@ -566,7 +566,7 @@ void watchdog_nmi_start(void) /* * Invoked from core watchdog init. */ -int __init watchdog_nmi_probe(void) +int __init watchdog_hardlockup_probe(void) { int err; @@ -582,7 +582,7 @@ int __init watchdog_nmi_probe(void) } #ifdef CONFIG_PPC_PSERIES -void watchdog_nmi_set_timeout_pct(u64 pct) +void watchdog_hardlockup_set_timeout_pct(u64 pct) { pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct); WRITE_ONCE(wd_timeout_pct, pct); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 6f30113b5468..cd632ba9ebff 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -750,7 +750,7 @@ static int pseries_migrate_partition(u64 handle) goto out; if (factor) - watchdog_nmi_set_timeout_pct(factor); + watchdog_hardlockup_set_timeout_pct(factor); ret = pseries_suspend(handle); if (ret == 0) { @@ -766,7 +766,7 @@ static int pseries_migrate_partition(u64 handle) pseries_cancel_migration(handle, ret); if (factor) - watchdog_nmi_set_timeout_pct(0); + watchdog_hardlockup_set_timeout_pct(0); out: vas_migration_handler(VAS_RESUME); diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h index 43b9ebfbd943..8e10a94430a2 100644 --- a/arch/riscv/include/asm/irq.h +++ b/arch/riscv/include/asm/irq.h @@ -16,6 +16,4 @@ void riscv_set_intc_hwnode_fn(struct fwnode_handle *(*fn)(void)); struct fwnode_handle *riscv_get_intc_hwnode(void); -extern void __init init_IRQ(void); - #endif /* _ASM_RISCV_IRQ_H */ diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index d6a7428f6248..a06697846e69 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -88,6 +88,4 @@ static inline int read_current_timer(unsigned long *timer_val) return 0; } -extern void time_init(void); - #endif /* _ASM_RISCV_TIMEX_H */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index c7c97921ed8d..a674c7d25da5 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -52,9 +52,6 @@ struct thread_info { struct task_struct; -void arch_release_task_struct(struct task_struct *tsk); -int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); - void arch_setup_new_exec(void); #define arch_setup_new_exec arch_setup_new_exec diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 34674e38826b..9f41853f36b9 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -34,14 +34,12 @@ void kernel_stack_overflow(struct pt_regs * regs); void handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs); -void __init init_IRQ(void); void do_io_irq(struct pt_regs *regs); void do_ext_irq(struct pt_regs *regs); void do_restart(void *arg); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); int setup_profiling_timer(unsigned int multiplier); -void __init time_init(void); unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip); struct s390_mmap_arg_struct; diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h index 1c4923502fd4..0f384b1f45ca 100644 --- a/arch/sh/include/asm/irq.h +++ b/arch/sh/include/asm/irq.h @@ -22,7 +22,6 @@ extern unsigned short *irq_mask_register; /* * PINT IRQs */ -void init_IRQ_pint(void); void make_imask_irq(unsigned int irq); static inline int generic_irq_demux(int irq) diff --git a/arch/sh/include/asm/rtc.h b/arch/sh/include/asm/rtc.h index 69dbae2949b0..7fe7002d1d50 100644 --- a/arch/sh/include/asm/rtc.h +++ b/arch/sh/include/asm/rtc.h @@ -2,8 +2,6 @@ #ifndef _ASM_RTC_H #define _ASM_RTC_H -void time_init(void); - #define RTC_CAP_4_DIGIT_YEAR (1 << 0) struct sh_rtc_platform_info { diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index 1400fbb8b423..9f19a682d315 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -84,9 +84,6 @@ static inline struct thread_info *current_thread_info(void) #define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) -extern void arch_task_cache_init(void); -extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); -extern void arch_release_task_struct(struct task_struct *tsk); extern void init_thread_xstate(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 36fd488ccbfa..6197b87f2b3b 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -33,7 +33,7 @@ config SPARC select ARCH_WANT_IPC_PARSE_VERSION select GENERIC_PCI_IOMAP select HAS_IOPORT - select HAVE_NMI_WATCHDOG if SPARC64 + select HAVE_HARDLOCKUP_DETECTOR_SPARC64 if SPARC64 select HAVE_CBPF_JIT if SPARC32 select HAVE_EBPF_JIT if SPARC64 select HAVE_DEBUG_BUGVERBOSE diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug index 6b2bec1888b3..37e003665de6 100644 --- a/arch/sparc/Kconfig.debug +++ b/arch/sparc/Kconfig.debug @@ -14,3 +14,17 @@ config FRAME_POINTER bool depends on MCOUNT default y + +config HAVE_HARDLOCKUP_DETECTOR_SPARC64 + bool + depends on HAVE_NMI + select HARDLOCKUP_DETECTOR_SPARC64 + help + Sparc64 hardlockup detector is the last one developed before adding + the common infrastructure for handling hardlockup detectors. It is + always built. It does _not_ use the common command line parameters + and sysctl interface, except for /proc/sys/kernel/nmi_watchdog. + +config HARDLOCKUP_DETECTOR_SPARC64 + bool + depends on HAVE_HARDLOCKUP_DETECTOR_SPARC64 diff --git a/arch/sparc/include/asm/irq_32.h b/arch/sparc/include/asm/irq_32.h index 43ec2609b811..6ee48321cbc2 100644 --- a/arch/sparc/include/asm/irq_32.h +++ b/arch/sparc/include/asm/irq_32.h @@ -17,7 +17,6 @@ #define irq_canonicalize(irq) (irq) -void __init init_IRQ(void); void __init sun4d_init_sbi_irq(void); #define NO_IRQ 0xffffffff diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index 154df2cf19f4..b436029f1ced 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h @@ -61,7 +61,6 @@ void sun4u_destroy_msi(unsigned int irq); unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino); void irq_free(unsigned int irq); -void __init init_IRQ(void); void fixup_irqs(void); static inline void set_softint(unsigned long bits) diff --git a/arch/sparc/include/asm/nmi.h b/arch/sparc/include/asm/nmi.h index 90ee7863d9fe..920dc23f443f 100644 --- a/arch/sparc/include/asm/nmi.h +++ b/arch/sparc/include/asm/nmi.h @@ -8,7 +8,6 @@ void nmi_adjust_hz(unsigned int new_hz); extern atomic_t nmi_active; -void arch_touch_nmi_watchdog(void); void start_nmi_watchdog(void *unused); void stop_nmi_watchdog(void *unused); diff --git a/arch/sparc/include/asm/timer_64.h b/arch/sparc/include/asm/timer_64.h index dcfad4613e18..ffff52c8b760 100644 --- a/arch/sparc/include/asm/timer_64.h +++ b/arch/sparc/include/asm/timer_64.h @@ -34,7 +34,6 @@ extern struct sparc64_tick_ops *tick_ops; unsigned long sparc64_get_clock_tick(unsigned int cpu); void setup_sparc64_timer(void); -void __init time_init(void); #define TICK_PRIV_BIT BIT(63) #define TICKCMP_IRQ_BIT BIT(63) diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index 9cd09a3ef35f..15da3c0597a5 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -91,7 +91,6 @@ extern int static_irq_count; extern spinlock_t irq_action_lock; void unexpected_irq(int irq, void *dev_id, struct pt_regs * regs); -void init_IRQ(void); /* sun4m_irq.c */ void sun4m_init_IRQ(void); diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 060fff95a305..17cdfdbf1f3b 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -65,6 +65,11 @@ void arch_touch_nmi_watchdog(void) } EXPORT_SYMBOL(arch_touch_nmi_watchdog); +int __init watchdog_hardlockup_probe(void) +{ + return 0; +} + static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) { int this_cpu = smp_processor_id(); @@ -282,11 +287,11 @@ __setup("nmi_watchdog=", setup_nmi_watchdog); * sparc specific NMI watchdog enable function. * Enables watchdog if it is not enabled already. */ -int watchdog_nmi_enable(unsigned int cpu) +void watchdog_hardlockup_enable(unsigned int cpu) { if (atomic_read(&nmi_active) == -1) { pr_warn("NMI watchdog cannot be enabled or disabled\n"); - return -1; + return; } /* @@ -295,17 +300,15 @@ int watchdog_nmi_enable(unsigned int cpu) * process first. */ if (!nmi_init_done) - return 0; + return; smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1); - - return 0; } /* * sparc specific NMI watchdog disable function. * Disables watchdog if it is not disabled already. */ -void watchdog_nmi_disable(unsigned int cpu) +void watchdog_hardlockup_disable(unsigned int cpu) { if (atomic_read(&nmi_active) == -1) pr_warn_once("NMI watchdog cannot be enabled or disabled\n"); diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 768aa234cbb4..29e083b92813 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -40,8 +40,6 @@ extern void __handle_irq(struct irq_desc *desc, struct pt_regs *regs); extern void init_ISA_irqs(void); -extern void __init init_IRQ(void); - #ifdef CONFIG_X86_LOCAL_APIC void arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_self); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index f1cccba52eb9..d63b02940747 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -232,9 +232,6 @@ static inline int arch_within_stack_frames(const void * const stack, current_thread_info()->status & TS_COMPAT) #endif -extern void arch_task_cache_init(void); -extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); -extern void arch_release_task_struct(struct task_struct *tsk); extern void arch_setup_new_exec(void); #define arch_setup_new_exec arch_setup_new_exec #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h index a53961c64a56..f360104ed172 100644 --- a/arch/x86/include/asm/time.h +++ b/arch/x86/include/asm/time.h @@ -6,7 +6,6 @@ #include <asm/mc146818rtc.h> extern void hpet_time_init(void); -extern void time_init(void); extern bool pit_timer_init(void); extern bool tsc_clocksource_watchdog_disabled(void); diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index dc1b03be43eb..594fce0ca744 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -32,7 +32,6 @@ extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); extern void tsc_early_init(void); extern void tsc_init(void); -extern unsigned long calibrate_delay_is_known(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 277e29fbd504..f6ccb2cd4dfc 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -689,6 +689,11 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu) return per_cpu(hw_events->irq, cpu); } +bool arm_pmu_irq_is_nmi(void) +{ + return has_nmi; +} + /* * PMU hardware loses all context when a CPU goes offline. * When a CPU is hotplugged back in, since some hardware registers are diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 93b7edb5f1e7..08b3a1bf0ef6 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -22,6 +22,7 @@ #include <linux/platform_device.h> #include <linux/sched_clock.h> #include <linux/smp.h> +#include <linux/nmi.h> #include <asm/arm_pmuv3.h> @@ -1363,10 +1364,17 @@ static struct platform_driver armv8_pmu_driver = { static int __init armv8_pmu_driver_init(void) { + int ret; + if (acpi_disabled) - return platform_driver_register(&armv8_pmu_driver); + ret = platform_driver_register(&armv8_pmu_driver); else - return arm_pmu_acpi_probe(armv8_pmuv3_pmu_init); + ret = arm_pmu_acpi_probe(armv8_pmuv3_pmu_init); + + if (!ret) + lockup_detector_retry_init(); + + return ret; } device_initcall(armv8_pmu_driver_init) diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index c4426d12a2ad..c803c10dd97e 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -973,7 +973,7 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, la_start_blk = ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(la->la_bm_off)); bitmap = la->la_bitmap; - start = count = bit_off = 0; + start = count = 0; left = le32_to_cpu(alloc->id1.bitmap1.i_total); while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index b8c3d1702076..ac4fd1d5b128 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -1315,8 +1315,6 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_sync_file); DEFINE_OCFS2_FILE_OPS(ocfs2_file_write_iter); -DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write); - DEFINE_OCFS2_FILE_OPS(ocfs2_file_read_iter); DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read); diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 5022b3e9bfcd..dfaae1e52412 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -811,7 +811,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) struct ocfs2_quota_chunk *chunk; struct ocfs2_local_disk_chunk *dchunk; int mark_clean = 1, len; - int status; + int status = 0; iput(oinfo->dqi_gqinode); ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock); @@ -853,17 +853,14 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) oinfo->dqi_libh, olq_update_info, info); - if (status < 0) { + if (status < 0) mlog_errno(status); - goto out; - } - out: ocfs2_inode_unlock(sb_dqopt(sb)->files[type], 1); brelse(oinfo->dqi_libh); brelse(oinfo->dqi_lqi_bh); kfree(oinfo); - return 0; + return status; } static void olq_set_dquot(struct buffer_head *bh, void *private) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 25b44b303b35..5d0cf59c4926 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -419,7 +419,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) char *notes; size_t i = 0; - strlcpy(prpsinfo.pr_psargs, saved_command_line, + strscpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs)); notes = kzalloc(notes_len, GFP_KERNEL); diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index bed3bb8b27fa..6aa9c2e1e8eb 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -17,8 +17,8 @@ #include <linux/fs.h> #include <linux/vfs.h> #include <linux/slab.h> +#include <linux/pagemap.h> #include <linux/string.h> -#include <linux/buffer_head.h> #include <linux/bio.h> #include "squashfs_fs.h" @@ -76,10 +76,101 @@ static int copy_bio_to_actor(struct bio *bio, return copied_bytes; } +static int squashfs_bio_read_cached(struct bio *fullbio, + struct address_space *cache_mapping, u64 index, int length, + u64 read_start, u64 read_end, int page_count) +{ + struct page *head_to_cache = NULL, *tail_to_cache = NULL; + struct block_device *bdev = fullbio->bi_bdev; + int start_idx = 0, end_idx = 0; + struct bvec_iter_all iter_all; + struct bio *bio = NULL; + struct bio_vec *bv; + int idx = 0; + int err = 0; + + bio_for_each_segment_all(bv, fullbio, iter_all) { + struct page *page = bv->bv_page; + + if (page->mapping == cache_mapping) { + idx++; + continue; + } + + /* + * We only use this when the device block size is the same as + * the page size, so read_start and read_end cover full pages. + * + * Compare these to the original required index and length to + * only cache pages which were requested partially, since these + * are the ones which are likely to be needed when reading + * adjacent blocks. + */ + if (idx == 0 && index != read_start) + head_to_cache = page; + else if (idx == page_count - 1 && index + length != read_end) + tail_to_cache = page; + + if (!bio || idx != end_idx) { + struct bio *new = bio_alloc_clone(bdev, fullbio, + GFP_NOIO, &fs_bio_set); + + if (bio) { + bio_trim(bio, start_idx * PAGE_SECTORS, + (end_idx - start_idx) * PAGE_SECTORS); + bio_chain(bio, new); + submit_bio(bio); + } + + bio = new; + start_idx = idx; + } + + idx++; + end_idx = idx; + } + + if (bio) { + bio_trim(bio, start_idx * PAGE_SECTORS, + (end_idx - start_idx) * PAGE_SECTORS); + err = submit_bio_wait(bio); + bio_put(bio); + } + + if (err) + return err; + + if (head_to_cache) { + int ret = add_to_page_cache_lru(head_to_cache, cache_mapping, + read_start >> PAGE_SHIFT, + GFP_NOIO); + + if (!ret) { + SetPageUptodate(head_to_cache); + unlock_page(head_to_cache); + } + + } + + if (tail_to_cache) { + int ret = add_to_page_cache_lru(tail_to_cache, cache_mapping, + (read_end >> PAGE_SHIFT) - 1, + GFP_NOIO); + + if (!ret) { + SetPageUptodate(tail_to_cache); + unlock_page(tail_to_cache); + } + } + + return 0; +} + static int squashfs_bio_read(struct super_block *sb, u64 index, int length, struct bio **biop, int *block_offset) { struct squashfs_sb_info *msblk = sb->s_fs_info; + struct address_space *cache_mapping = msblk->cache_mapping; const u64 read_start = round_down(index, msblk->devblksize); const sector_t block = read_start >> msblk->devblksize_log2; const u64 read_end = round_up(index + length, msblk->devblksize); @@ -99,21 +190,34 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length, for (i = 0; i < page_count; ++i) { unsigned int len = min_t(unsigned int, PAGE_SIZE - offset, total_len); - struct page *page = alloc_page(GFP_NOIO); + struct page *page = NULL; + + if (cache_mapping) + page = find_get_page(cache_mapping, + (read_start >> PAGE_SHIFT) + i); + if (!page) + page = alloc_page(GFP_NOIO); if (!page) { error = -ENOMEM; goto out_free_bio; } - if (!bio_add_page(bio, page, len, offset)) { - error = -EIO; - goto out_free_bio; - } + + /* + * Use the __ version to avoid merging since we need each page + * to be separate when we check for and avoid cached pages. + */ + __bio_add_page(bio, page, len, offset); offset = 0; total_len -= len; } - error = submit_bio_wait(bio); + if (cache_mapping) + error = squashfs_bio_read_cached(bio, cache_mapping, index, + length, read_start, read_end, + page_count); + else + error = submit_bio_wait(bio); if (error) goto out_free_bio; diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 8893cb9b4198..a676084be27e 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -11,7 +11,6 @@ #include <linux/types.h> #include <linux/mutex.h> #include <linux/slab.h> -#include <linux/buffer_head.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c index 1dfadf76ed9a..8a218e7c2390 100644 --- a/fs/squashfs/decompressor_multi_percpu.c +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -7,7 +7,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/percpu.h> -#include <linux/buffer_head.h> #include <linux/local_lock.h> #include "squashfs_fs.h" diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 72f6f4b37863..c01998eec146 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -47,6 +47,7 @@ struct squashfs_sb_info { struct squashfs_cache *block_cache; struct squashfs_cache *fragment_cache; struct squashfs_cache *read_page; + struct address_space *cache_mapping; int next_meta_index; __le64 *id_table; __le64 *fragment_index; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index e090fae48e68..22e812808e5c 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -329,6 +329,19 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) goto failed_mount; } + if (msblk->devblksize == PAGE_SIZE) { + struct inode *cache = new_inode(sb); + + if (cache == NULL) + goto failed_mount; + + set_nlink(cache, 1); + cache->i_size = OFFSET_MAX; + mapping_set_gfp_mask(cache->i_mapping, GFP_NOFS); + + msblk->cache_mapping = cache->i_mapping; + } + msblk->stream = squashfs_decompressor_setup(sb, flags); if (IS_ERR(msblk->stream)) { err = PTR_ERR(msblk->stream); @@ -454,6 +467,8 @@ failed_mount: squashfs_cache_delete(msblk->block_cache); squashfs_cache_delete(msblk->fragment_cache); squashfs_cache_delete(msblk->read_page); + if (msblk->cache_mapping) + iput(msblk->cache_mapping->host); msblk->thread_ops->destroy(msblk); kfree(msblk->inode_lookup_table); kfree(msblk->fragment_index); @@ -572,6 +587,8 @@ static void squashfs_put_super(struct super_block *sb) squashfs_cache_delete(sbi->block_cache); squashfs_cache_delete(sbi->fragment_cache); squashfs_cache_delete(sbi->read_page); + if (sbi->cache_mapping) + iput(sbi->cache_mapping->host); sbi->thread_ops->destroy(sbi); kfree(sbi->id_table); kfree(sbi->fragment_index); diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 4050b191e1a9..6e794420bd39 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -87,10 +87,12 @@ struct bug_entry { * * Use the versions with printk format strings to provide better diagnostics. */ -#ifndef __WARN_FLAGS extern __printf(4, 5) void warn_slowpath_fmt(const char *file, const int line, unsigned taint, const char *fmt, ...); +extern __printf(1, 2) void __warn_printk(const char *fmt, ...); + +#ifndef __WARN_FLAGS #define __WARN() __WARN_printf(TAINT_WARN, NULL) #define __WARN_printf(taint, arg...) do { \ instrumentation_begin(); \ @@ -98,7 +100,6 @@ void warn_slowpath_fmt(const char *file, const int line, unsigned taint, instrumentation_end(); \ } while (0) #else -extern __printf(1, 2) void __warn_printk(const char *fmt, ...); #define __WARN() __WARN_FLAGS(BUGFLAG_TAINT(TAINT_WARN)) #define __WARN_printf(taint, arg...) do { \ instrumentation_begin(); \ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 5ef126a0a50f..fd8849ae4a8e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -712,7 +712,6 @@ int acpi_match_platform_list(const struct acpi_platform_list *plat); extern void acpi_early_init(void); extern void acpi_subsystem_init(void); -extern void arch_post_acpi_subsys_init(void); extern int acpi_nvs_register(__u64 start, __u64 size); @@ -1084,6 +1083,8 @@ static inline bool acpi_sleep_state_supported(u8 sleep_state) #endif /* !CONFIG_ACPI */ +extern void arch_post_acpi_subsys_init(void); + #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_ioapic_add(acpi_handle root); #else diff --git a/include/linux/delay.h b/include/linux/delay.h index 039e7e0c7378..ff9cda975e30 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -56,6 +56,7 @@ static inline void ndelay(unsigned long x) extern unsigned long lpj_fine; void calibrate_delay(void); +unsigned long calibrate_delay_is_known(void); void __attribute__((weak)) calibration_delay_done(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); diff --git a/include/linux/init.h b/include/linux/init.h index c5fe6d26f5b1..1200fa99e848 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -152,6 +152,24 @@ extern unsigned int reset_devices; void setup_arch(char **); void prepare_namespace(void); void __init init_rootfs(void); + +void init_IRQ(void); +void time_init(void); +void mem_encrypt_init(void); +void poking_init(void); +void pgtable_cache_init(void); + +extern initcall_entry_t __initcall_start[]; +extern initcall_entry_t __initcall0_start[]; +extern initcall_entry_t __initcall1_start[]; +extern initcall_entry_t __initcall2_start[]; +extern initcall_entry_t __initcall3_start[]; +extern initcall_entry_t __initcall4_start[]; +extern initcall_entry_t __initcall5_start[]; +extern initcall_entry_t __initcall6_start[]; +extern initcall_entry_t __initcall7_start[]; +extern initcall_entry_t __initcall_end[]; + extern struct file_system_type rootfs_fs_type; #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX) @@ -309,6 +327,8 @@ struct obs_kernel_param { int early; }; +extern const struct obs_kernel_param __setup_start[], __setup_end[]; + /* * Only for really core code. See moduleparam.h for the normal way. * diff --git a/include/linux/kcov.h b/include/linux/kcov.h index ee04256f28af..b851ba415e03 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -72,6 +72,23 @@ static inline void kcov_remote_stop_softirq(void) kcov_remote_stop(); } +#ifdef CONFIG_64BIT +typedef unsigned long kcov_u64; +#else +typedef unsigned long long kcov_u64; +#endif + +void __sanitizer_cov_trace_pc(void); +void __sanitizer_cov_trace_cmp1(u8 arg1, u8 arg2); +void __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2); +void __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2); +void __sanitizer_cov_trace_cmp8(kcov_u64 arg1, kcov_u64 arg2); +void __sanitizer_cov_trace_const_cmp1(u8 arg1, u8 arg2); +void __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2); +void __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2); +void __sanitizer_cov_trace_const_cmp8(kcov_u64 arg1, kcov_u64 arg2); +void __sanitizer_cov_trace_switch(kcov_u64 val, void *cases); + #else static inline void kcov_task_init(struct task_struct *t) {} diff --git a/include/linux/math.h b/include/linux/math.h index 439b8f0b9ebd..2d388650c556 100644 --- a/include/linux/math.h +++ b/include/linux/math.h @@ -118,17 +118,17 @@ __STRUCT_FRACT(s32) __STRUCT_FRACT(u32) #undef __STRUCT_FRACT -/* - * Multiplies an integer by a fraction, while avoiding unnecessary - * overflow or loss of precision. - */ -#define mult_frac(x, numer, denom)( \ -{ \ - typeof(x) quot = (x) / (denom); \ - typeof(x) rem = (x) % (denom); \ - (quot * (numer)) + ((rem * (numer)) / (denom)); \ -} \ -) +/* Calculate "x * n / d" without unnecessary overflow or loss of precision. */ +#define mult_frac(x, n, d) \ +({ \ + typeof(x) x_ = (x); \ + typeof(n) n_ = (n); \ + typeof(d) d_ = (d); \ + \ + typeof(x_) q = x_ / d_; \ + typeof(x_) r = x_ % d_; \ + q * n_ + r * n_ / d_; \ +}) #define sector_div(a, b) do_div(a, b) diff --git a/include/linux/mm.h b/include/linux/mm.h index ae866bc9bad6..eef34f6a0351 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3491,13 +3491,12 @@ static inline bool debug_pagealloc_enabled_static(void) return static_branch_unlikely(&_debug_pagealloc_enabled); } -#ifdef CONFIG_DEBUG_PAGEALLOC /* * To support DEBUG_PAGEALLOC architecture must ensure that * __kernel_map_pages() never fails */ extern void __kernel_map_pages(struct page *page, int numpages, int enable); - +#ifdef CONFIG_DEBUG_PAGEALLOC static inline void debug_pagealloc_map_pages(struct page *page, int numpages) { if (debug_pagealloc_enabled_static()) diff --git a/include/linux/mount.h b/include/linux/mount.h index 4b81ea90440e..4f40b40306d0 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -123,4 +123,6 @@ extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, struct vfsmount *); extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num); +extern int cifs_root_data(char **dev, char **opts); + #endif /* _LINUX_MOUNT_H */ diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 048c0b9aa623..e3e6a64b98e0 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -7,19 +7,19 @@ #include <linux/sched.h> #include <asm/irq.h> -#if defined(CONFIG_HAVE_NMI_WATCHDOG) + +/* Arch specific watchdogs might need to share extra watchdog-related APIs. */ +#if defined(CONFIG_HARDLOCKUP_DETECTOR_ARCH) || defined(CONFIG_HARDLOCKUP_DETECTOR_SPARC64) #include <asm/nmi.h> #endif #ifdef CONFIG_LOCKUP_DETECTOR void lockup_detector_init(void); +void lockup_detector_retry_init(void); void lockup_detector_soft_poweroff(void); void lockup_detector_cleanup(void); -bool is_hardlockup(void); extern int watchdog_user_enabled; -extern int nmi_watchdog_user_enabled; -extern int soft_watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long watchdog_enabled; @@ -35,6 +35,7 @@ extern int sysctl_hardlockup_all_cpu_backtrace; #else /* CONFIG_LOCKUP_DETECTOR */ static inline void lockup_detector_init(void) { } +static inline void lockup_detector_retry_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } static inline void lockup_detector_cleanup(void) { } #endif /* !CONFIG_LOCKUP_DETECTOR */ @@ -69,17 +70,17 @@ static inline void reset_hung_task_detector(void) { } * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. * - * 'watchdog_user_enabled', 'nmi_watchdog_user_enabled' and - * 'soft_watchdog_user_enabled' are variables that are only used as an + * 'watchdog_user_enabled', 'watchdog_hardlockup_user_enabled' and + * 'watchdog_softlockup_user_enabled' are variables that are only used as an * 'interface' between the parameters in /proc/sys/kernel and the internal * state bits in 'watchdog_enabled'. The 'watchdog_thresh' variable is * handled differently because its value is not boolean, and the lockup * detectors are 'suspended' while 'watchdog_thresh' is equal zero. */ -#define NMI_WATCHDOG_ENABLED_BIT 0 -#define SOFT_WATCHDOG_ENABLED_BIT 1 -#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) -#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) +#define WATCHDOG_HARDLOCKUP_ENABLED_BIT 0 +#define WATCHDOG_SOFTOCKUP_ENABLED_BIT 1 +#define WATCHDOG_HARDLOCKUP_ENABLED (1 << WATCHDOG_HARDLOCKUP_ENABLED_BIT) +#define WATCHDOG_SOFTOCKUP_ENABLED (1 << WATCHDOG_SOFTOCKUP_ENABLED_BIT) #if defined(CONFIG_HARDLOCKUP_DETECTOR) extern void hardlockup_detector_disable(void); @@ -88,52 +89,63 @@ extern unsigned int hardlockup_panic; static inline void hardlockup_detector_disable(void) {} #endif -#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) -# define NMI_WATCHDOG_SYSCTL_PERM 0644 +/* Sparc64 has special implemetantion that is always enabled. */ +#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HARDLOCKUP_DETECTOR_SPARC64) +void arch_touch_nmi_watchdog(void); #else -# define NMI_WATCHDOG_SYSCTL_PERM 0444 +static inline void arch_touch_nmi_watchdog(void) { } +#endif + +#if defined(CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER) +void watchdog_hardlockup_touch_cpu(unsigned int cpu); +void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); #endif #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) -extern void arch_touch_nmi_watchdog(void); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); -extern void hardlockup_detector_perf_disable(void); -extern void hardlockup_detector_perf_enable(void); extern void hardlockup_detector_perf_cleanup(void); -extern int hardlockup_detector_perf_init(void); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } -static inline void hardlockup_detector_perf_disable(void) { } -static inline void hardlockup_detector_perf_enable(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } -# if !defined(CONFIG_HAVE_NMI_WATCHDOG) -static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } -static inline void arch_touch_nmi_watchdog(void) {} -# else -static inline int hardlockup_detector_perf_init(void) { return 0; } -# endif #endif -void watchdog_nmi_stop(void); -void watchdog_nmi_start(void); -int watchdog_nmi_probe(void); -int watchdog_nmi_enable(unsigned int cpu); -void watchdog_nmi_disable(unsigned int cpu); +void watchdog_hardlockup_stop(void); +void watchdog_hardlockup_start(void); +int watchdog_hardlockup_probe(void); +void watchdog_hardlockup_enable(unsigned int cpu); +void watchdog_hardlockup_disable(unsigned int cpu); void lockup_detector_reconfigure(void); +#ifdef CONFIG_HARDLOCKUP_DETECTOR_BUDDY +void watchdog_buddy_check_hardlockup(int hrtimer_interrupts); +#else +static inline void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) {} +#endif + /** - * touch_nmi_watchdog - restart NMI watchdog timeout. + * touch_nmi_watchdog - manually reset the hardlockup watchdog timeout. * - * If the architecture supports the NMI watchdog, touch_nmi_watchdog() - * may be used to reset the timeout - for code which intentionally - * disables interrupts for a long time. This call is stateless. + * If we support detecting hardlockups, touch_nmi_watchdog() may be + * used to pet the watchdog (reset the timeout) - for code which + * intentionally disables interrupts for a long time. This call is stateless. + * + * Though this function has "nmi" in the name, the hardlockup watchdog might + * not be backed by NMIs. This function will likely be renamed to + * touch_hardlockup_watchdog() in the future. */ static inline void touch_nmi_watchdog(void) { + /* + * Pass on to the hardlockup detector selected via CONFIG_. Note that + * the hardlockup detector may not be arch-specific nor using NMIs + * and the arch_touch_nmi_watchdog() function will likely be renamed + * in the future. + */ arch_touch_nmi_watchdog(); + touch_softlockup_watchdog(); } @@ -194,10 +206,11 @@ static inline bool trigger_single_cpu_backtrace(int cpu) #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF u64 hw_nmi_get_sample_period(int watchdog_thresh); +bool arch_perf_nmi_is_available(void); #endif #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ - defined(CONFIG_HARDLOCKUP_DETECTOR) + defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) void watchdog_update_hrtimer_threshold(u64 period); #else static inline void watchdog_update_hrtimer_threshold(u64 period) { } diff --git a/include/linux/panic.h b/include/linux/panic.h index 979b776e3bcb..6717b15e798c 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -32,6 +32,9 @@ extern int sysctl_panic_on_stackoverflow; extern bool crash_kexec_post_notifiers; +extern void __stack_chk_fail(void); +void abort(void); + /* * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It * holds a CPU number which is executing panic() currently. A value of diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 1338ea2aa720..42125cf9c506 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -103,12 +103,10 @@ extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai); extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, void *base_addr); -#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, size_t atom_size, pcpu_fc_cpu_distance_fn_t cpu_distance_fn, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn); -#endif #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK void __init pcpu_populate_pte(unsigned long addr); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index c0e4baf940dc..a0801f68762b 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -173,6 +173,8 @@ void kvm_host_pmu_init(struct arm_pmu *pmu); #define kvm_host_pmu_init(x) do { } while(0) #endif +bool arm_pmu_irq_is_nmi(void); + /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); void armpmu_free(struct arm_pmu *pmu); diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index c02646884fa8..9ea0b28068f4 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -256,6 +256,11 @@ check_copy_size(const void *addr, size_t bytes, bool is_source) static inline void arch_setup_new_exec(void) { } #endif +void arch_task_cache_init(void); /* for CONFIG_SH */ +void arch_release_task_struct(struct task_struct *tsk); +int arch_dup_task_struct(struct task_struct *dst, + struct task_struct *src); + #endif /* __KERNEL__ */ #endif /* _LINUX_THREAD_INFO_H */ diff --git a/include/linux/types.h b/include/linux/types.h index becb8cd5916f..253168bb3fe1 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -40,6 +40,7 @@ typedef __kernel_uid16_t uid16_t; typedef __kernel_gid16_t gid16_t; typedef unsigned long uintptr_t; +typedef long intptr_t; #ifdef CONFIG_HAVE_UID16 /* This is defined by include/asm-{arch}/posix_types.h */ diff --git a/init/do_mounts.c b/init/do_mounts.c index 2a970f6ac68f..1aa015883519 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -192,8 +192,19 @@ retry: printk("VFS: Cannot open root device \"%s\" or %s: error %d\n", pretty_name, b, err); printk("Please append a correct \"root=\" boot option; here are the available partitions:\n"); - printk_all_partitions(); + + if (root_fs_names) + num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE); + if (!num_fs) + pr_err("Can't find any bdev filesystem to be used for mount!\n"); + else { + pr_err("List of all bdev filesystems:\n"); + for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1) + pr_err(" %s", p); + pr_err("\n"); + } + panic("VFS: Unable to mount root fs on %s", b); } if (!(flags & SB_RDONLY)) { @@ -256,8 +267,6 @@ static inline void mount_nfs_root(void) #ifdef CONFIG_CIFS_ROOT -extern int cifs_root_data(char **dev, char **opts); - #define CIFSROOT_TIMEOUT_MIN 5 #define CIFSROOT_TIMEOUT_MAX 30 #define CIFSROOT_RETRY_MAX 5 diff --git a/init/main.c b/init/main.c index 0d2cceff9993..ad920fac325c 100644 --- a/init/main.c +++ b/init/main.c @@ -113,10 +113,6 @@ static int kernel_init(void *); -extern void init_IRQ(void); -extern void radix_tree_init(void); -extern void maple_tree_init(void); - /* * Debug helper: via this flag we know that we are in 'early bootup code' * where only the boot processor is running with IRQ disabled. This means @@ -135,7 +131,6 @@ EXPORT_SYMBOL(system_state); #define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT #define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT -extern void time_init(void); /* Default late time init is NULL. archs can override this later. */ void (*__initdata late_time_init)(void); @@ -194,8 +189,6 @@ static const char *argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static const char *panic_later, *panic_param; -extern const struct obs_kernel_param __setup_start[], __setup_end[]; - static bool __init obsolete_checksetup(char *line) { const struct obs_kernel_param *p; @@ -1256,17 +1249,6 @@ int __init_or_module do_one_initcall(initcall_t fn) } -extern initcall_entry_t __initcall_start[]; -extern initcall_entry_t __initcall0_start[]; -extern initcall_entry_t __initcall1_start[]; -extern initcall_entry_t __initcall2_start[]; -extern initcall_entry_t __initcall3_start[]; -extern initcall_entry_t __initcall4_start[]; -extern initcall_entry_t __initcall5_start[]; -extern initcall_entry_t __initcall6_start[]; -extern initcall_entry_t __initcall7_start[]; -extern initcall_entry_t __initcall_end[]; - static initcall_entry_t *initcall_levels[] __initdata = { __initcall0_start, __initcall1_start, diff --git a/kernel/Makefile b/kernel/Makefile index b69c95315480..f9e3fd9195d9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -91,7 +91,8 @@ obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o -obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o +obj-$(CONFIG_HARDLOCKUP_DETECTOR_BUDDY) += watchdog_buddy.o +obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_perf.o obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o diff --git a/kernel/fork.c b/kernel/fork.c index 41c964104b58..b85814e614a5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -252,23 +252,19 @@ static int memcg_charge_kernel_stack(struct vm_struct *vm) { int i; int ret; + int nr_charged = 0; - BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0); if (ret) goto err; + nr_charged++; } return 0; err: - /* - * If memcg_kmem_charge_page() fails, page's memory cgroup pointer is - * NULL, and memcg_kmem_uncharge_page() in free_thread_stack() will - * ignore this page. - */ - for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) + for (i = 0; i < nr_charged; i++) memcg_kmem_uncharge_page(vm->pages[i], 0); return ret; } diff --git a/kernel/kcov.c b/kernel/kcov.c index 84c717337df0..f9ac2e9e460f 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -279,7 +279,7 @@ void notrace __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2) } EXPORT_SYMBOL(__sanitizer_cov_trace_cmp4); -void notrace __sanitizer_cov_trace_cmp8(u64 arg1, u64 arg2) +void notrace __sanitizer_cov_trace_cmp8(kcov_u64 arg1, kcov_u64 arg2) { write_comp_data(KCOV_CMP_SIZE(3), arg1, arg2, _RET_IP_); } @@ -306,16 +306,17 @@ void notrace __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2) } EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp4); -void notrace __sanitizer_cov_trace_const_cmp8(u64 arg1, u64 arg2) +void notrace __sanitizer_cov_trace_const_cmp8(kcov_u64 arg1, kcov_u64 arg2) { write_comp_data(KCOV_CMP_SIZE(3) | KCOV_CMP_CONST, arg1, arg2, _RET_IP_); } EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp8); -void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases) +void notrace __sanitizer_cov_trace_switch(kcov_u64 val, void *arg) { u64 i; + u64 *cases = arg; u64 count = cases[0]; u64 size = cases[1]; u64 type = KCOV_CMP_CONST; diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 3d578c6fefee..e2f2574d8b74 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1091,6 +1091,11 @@ __bpf_kfunc void crash_kexec(struct pt_regs *regs) } } +static inline resource_size_t crash_resource_size(const struct resource *res) +{ + return !res->end ? 0 : resource_size(res); +} + ssize_t crash_get_memory_size(void) { ssize_t size = 0; @@ -1098,19 +1103,45 @@ ssize_t crash_get_memory_size(void) if (!kexec_trylock()) return -EBUSY; - if (crashk_res.end != crashk_res.start) - size = resource_size(&crashk_res); + size += crash_resource_size(&crashk_res); + size += crash_resource_size(&crashk_low_res); kexec_unlock(); return size; } +static int __crash_shrink_memory(struct resource *old_res, + unsigned long new_size) +{ + struct resource *ram_res; + + ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); + if (!ram_res) + return -ENOMEM; + + ram_res->start = old_res->start + new_size; + ram_res->end = old_res->end; + ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; + ram_res->name = "System RAM"; + + if (!new_size) { + release_resource(old_res); + old_res->start = 0; + old_res->end = 0; + } else { + crashk_res.end = ram_res->start - 1; + } + + crash_free_reserved_phys_range(ram_res->start, ram_res->end); + insert_resource(&iomem_resource, ram_res); + + return 0; +} + int crash_shrink_memory(unsigned long new_size) { int ret = 0; - unsigned long start, end; - unsigned long old_size; - struct resource *ram_res; + unsigned long old_size, low_size; if (!kexec_trylock()) return -EBUSY; @@ -1119,36 +1150,42 @@ int crash_shrink_memory(unsigned long new_size) ret = -ENOENT; goto unlock; } - start = crashk_res.start; - end = crashk_res.end; - old_size = (end == 0) ? 0 : end - start + 1; + + low_size = crash_resource_size(&crashk_low_res); + old_size = crash_resource_size(&crashk_res) + low_size; + new_size = roundup(new_size, KEXEC_CRASH_MEM_ALIGN); if (new_size >= old_size) { ret = (new_size == old_size) ? 0 : -EINVAL; goto unlock; } - ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); - if (!ram_res) { - ret = -ENOMEM; - goto unlock; - } - - start = roundup(start, KEXEC_CRASH_MEM_ALIGN); - end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); - - crash_free_reserved_phys_range(end, crashk_res.end); - - if ((start == end) && (crashk_res.parent != NULL)) - release_resource(&crashk_res); - - ram_res->start = end; - ram_res->end = crashk_res.end; - ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; - ram_res->name = "System RAM"; + /* + * (low_size > new_size) implies that low_size is greater than zero. + * This also means that if low_size is zero, the else branch is taken. + * + * If low_size is greater than 0, (low_size > new_size) indicates that + * crashk_low_res also needs to be shrunken. Otherwise, only crashk_res + * needs to be shrunken. + */ + if (low_size > new_size) { + ret = __crash_shrink_memory(&crashk_res, 0); + if (ret) + goto unlock; - crashk_res.end = end - 1; + ret = __crash_shrink_memory(&crashk_low_res, new_size); + } else { + ret = __crash_shrink_memory(&crashk_res, new_size - low_size); + } - insert_resource(&iomem_resource, ram_res); + /* Swap crashk_res and crashk_low_res if needed */ + if (!crashk_res.end && crashk_low_res.end) { + crashk_res.start = crashk_low_res.start; + crashk_res.end = crashk_low_res.end; + release_resource(&crashk_low_res); + crashk_low_res.start = 0; + crashk_low_res.end = 0; + insert_resource(&iomem_resource, &crashk_res); + } unlock: kexec_unlock(); diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 69ee4a29136f..881ba0d1714c 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -867,6 +867,7 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi, { unsigned long bss_addr; unsigned long offset; + size_t sechdrs_size; Elf_Shdr *sechdrs; int i; @@ -874,11 +875,11 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi, * The section headers in kexec_purgatory are read-only. In order to * have them modifiable make a temporary copy. */ - sechdrs = vzalloc(array_size(sizeof(Elf_Shdr), pi->ehdr->e_shnum)); + sechdrs_size = array_size(sizeof(Elf_Shdr), pi->ehdr->e_shnum); + sechdrs = vzalloc(sechdrs_size); if (!sechdrs) return -ENOMEM; - memcpy(sechdrs, (void *)pi->ehdr + pi->ehdr->e_shoff, - pi->ehdr->e_shnum * sizeof(Elf_Shdr)); + memcpy(sechdrs, (void *)pi->ehdr + pi->ehdr->e_shoff, sechdrs_size); pi->sechdrs = sechdrs; offset = 0; diff --git a/kernel/kthread.c b/kernel/kthread.c index 07a057086d26..4fff7df17a68 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -322,10 +322,10 @@ void __noreturn kthread_exit(long result) * @comp: Completion to complete * @code: The integer value to return to kthread_stop(). * - * If present complete @comp and the reuturn code to kthread_stop(). + * If present, complete @comp and then return code to kthread_stop(). * * A kernel thread whose module may be removed after the completion of - * @comp can use this function exit safely. + * @comp can use this function to exit safely. * * Does not return. */ diff --git a/kernel/locking/lock_events.h b/kernel/locking/lock_events.h index 8c7e7d25f09c..a6016b91803d 100644 --- a/kernel/locking/lock_events.h +++ b/kernel/locking/lock_events.h @@ -57,4 +57,8 @@ static inline void __lockevent_add(enum lock_events event, int inc) #define lockevent_cond_inc(ev, c) #endif /* CONFIG_LOCK_EVENT_COUNTS */ + +ssize_t lockevent_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos); + #endif /* __LOCKING_LOCK_EVENTS_H */ diff --git a/kernel/panic.c b/kernel/panic.c index 886d2ebd0a0d..10effe40a3fa 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -684,6 +684,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, add_taint(taint, LOCKDEP_STILL_OK); } +#ifdef CONFIG_BUG #ifndef __WARN_FLAGS void warn_slowpath_fmt(const char *file, int line, unsigned taint, const char *fmt, ...) @@ -722,8 +723,6 @@ void __warn_printk(const char *fmt, ...) EXPORT_SYMBOL(__warn_printk); #endif -#ifdef CONFIG_BUG - /* Support resetting WARN*_ONCE state */ static int clear_warn_once_set(void *data, u64 val) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 8e61f21e7e33..be38276a365f 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -29,20 +29,18 @@ static DEFINE_MUTEX(watchdog_mutex); -#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) -# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED) -# define NMI_WATCHDOG_DEFAULT 1 +#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HARDLOCKUP_DETECTOR_SPARC64) +# define WATCHDOG_HARDLOCKUP_DEFAULT 1 #else -# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED) -# define NMI_WATCHDOG_DEFAULT 0 +# define WATCHDOG_HARDLOCKUP_DEFAULT 0 #endif unsigned long __read_mostly watchdog_enabled; int __read_mostly watchdog_user_enabled = 1; -int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT; -int __read_mostly soft_watchdog_user_enabled = 1; +static int __read_mostly watchdog_hardlockup_user_enabled = WATCHDOG_HARDLOCKUP_DEFAULT; +static int __read_mostly watchdog_softlockup_user_enabled = 1; int __read_mostly watchdog_thresh = 10; -static int __read_mostly nmi_watchdog_available; +static int __read_mostly watchdog_hardlockup_available; struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); @@ -68,7 +66,7 @@ unsigned int __read_mostly hardlockup_panic = */ void __init hardlockup_detector_disable(void) { - nmi_watchdog_user_enabled = 0; + watchdog_hardlockup_user_enabled = 0; } static int __init hardlockup_panic_setup(char *str) @@ -78,54 +76,163 @@ static int __init hardlockup_panic_setup(char *str) else if (!strncmp(str, "nopanic", 7)) hardlockup_panic = 0; else if (!strncmp(str, "0", 1)) - nmi_watchdog_user_enabled = 0; + watchdog_hardlockup_user_enabled = 0; else if (!strncmp(str, "1", 1)) - nmi_watchdog_user_enabled = 1; + watchdog_hardlockup_user_enabled = 1; return 1; } __setup("nmi_watchdog=", hardlockup_panic_setup); #endif /* CONFIG_HARDLOCKUP_DETECTOR */ -/* - * These functions can be overridden if an architecture implements its - * own hardlockup detector. - * - * watchdog_nmi_enable/disable can be implemented to start and stop when - * softlockup watchdog start and stop. The arch must select the - * SOFTLOCKUP_DETECTOR Kconfig. - */ -int __weak watchdog_nmi_enable(unsigned int cpu) +#if defined(CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER) + +static DEFINE_PER_CPU(atomic_t, hrtimer_interrupts); +static DEFINE_PER_CPU(int, hrtimer_interrupts_saved); +static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned); +static DEFINE_PER_CPU(bool, watchdog_hardlockup_touched); +static unsigned long watchdog_hardlockup_all_cpu_dumped; + +notrace void arch_touch_nmi_watchdog(void) { - hardlockup_detector_perf_enable(); - return 0; + /* + * Using __raw here because some code paths have + * preemption enabled. If preemption is enabled + * then interrupts should be enabled too, in which + * case we shouldn't have to worry about the watchdog + * going off. + */ + raw_cpu_write(watchdog_hardlockup_touched, true); +} +EXPORT_SYMBOL(arch_touch_nmi_watchdog); + +void watchdog_hardlockup_touch_cpu(unsigned int cpu) +{ + per_cpu(watchdog_hardlockup_touched, cpu) = true; } -void __weak watchdog_nmi_disable(unsigned int cpu) +static bool is_hardlockup(unsigned int cpu) { - hardlockup_detector_perf_disable(); + int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu)); + + if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint) + return true; + + /* + * NOTE: we don't need any fancy atomic_t or READ_ONCE/WRITE_ONCE + * for hrtimer_interrupts_saved. hrtimer_interrupts_saved is + * written/read by a single CPU. + */ + per_cpu(hrtimer_interrupts_saved, cpu) = hrint; + + return false; } -/* Return 0, if a NMI watchdog is available. Error code otherwise */ -int __weak __init watchdog_nmi_probe(void) +static void watchdog_hardlockup_kick(void) { - return hardlockup_detector_perf_init(); + int new_interrupts; + + new_interrupts = atomic_inc_return(this_cpu_ptr(&hrtimer_interrupts)); + watchdog_buddy_check_hardlockup(new_interrupts); +} + +void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) +{ + if (per_cpu(watchdog_hardlockup_touched, cpu)) { + per_cpu(watchdog_hardlockup_touched, cpu) = false; + return; + } + + /* + * Check for a hardlockup by making sure the CPU's timer + * interrupt is incrementing. The timer interrupt should have + * fired multiple times before we overflow'd. If it hasn't + * then this is a good indication the cpu is stuck + */ + if (is_hardlockup(cpu)) { + unsigned int this_cpu = smp_processor_id(); + struct cpumask backtrace_mask; + + cpumask_copy(&backtrace_mask, cpu_online_mask); + + /* Only print hardlockups once. */ + if (per_cpu(watchdog_hardlockup_warned, cpu)) + return; + + pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", cpu); + print_modules(); + print_irqtrace_events(current); + if (cpu == this_cpu) { + if (regs) + show_regs(regs); + else + dump_stack(); + cpumask_clear_cpu(cpu, &backtrace_mask); + } else { + if (trigger_single_cpu_backtrace(cpu)) + cpumask_clear_cpu(cpu, &backtrace_mask); + } + + /* + * Perform multi-CPU dump only once to avoid multiple + * hardlockups generating interleaving traces + */ + if (sysctl_hardlockup_all_cpu_backtrace && + !test_and_set_bit(0, &watchdog_hardlockup_all_cpu_dumped)) + trigger_cpumask_backtrace(&backtrace_mask); + + if (hardlockup_panic) + nmi_panic(regs, "Hard LOCKUP"); + + per_cpu(watchdog_hardlockup_warned, cpu) = true; + } else { + per_cpu(watchdog_hardlockup_warned, cpu) = false; + } +} + +#else /* CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */ + +static inline void watchdog_hardlockup_kick(void) { } + +#endif /* !CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */ + +/* + * These functions can be overridden based on the configured hardlockdup detector. + * + * watchdog_hardlockup_enable/disable can be implemented to start and stop when + * softlockup watchdog start and stop. The detector must select the + * SOFTLOCKUP_DETECTOR Kconfig. + */ +void __weak watchdog_hardlockup_enable(unsigned int cpu) { } + +void __weak watchdog_hardlockup_disable(unsigned int cpu) { } + +/* + * Watchdog-detector specific API. + * + * Return 0 when hardlockup watchdog is available, negative value otherwise. + * Note that the negative value means that a delayed probe might + * succeed later. + */ +int __weak __init watchdog_hardlockup_probe(void) +{ + return -ENODEV; } /** - * watchdog_nmi_stop - Stop the watchdog for reconfiguration + * watchdog_hardlockup_stop - Stop the watchdog for reconfiguration * * The reconfiguration steps are: - * watchdog_nmi_stop(); + * watchdog_hardlockup_stop(); * update_variables(); - * watchdog_nmi_start(); + * watchdog_hardlockup_start(); */ -void __weak watchdog_nmi_stop(void) { } +void __weak watchdog_hardlockup_stop(void) { } /** - * watchdog_nmi_start - Start the watchdog after reconfiguration + * watchdog_hardlockup_start - Start the watchdog after reconfiguration * - * Counterpart to watchdog_nmi_stop(). + * Counterpart to watchdog_hardlockup_stop(). * * The following variables have been updated in update_variables() and * contain the currently valid configuration: @@ -133,23 +240,23 @@ void __weak watchdog_nmi_stop(void) { } * - watchdog_thresh * - watchdog_cpumask */ -void __weak watchdog_nmi_start(void) { } +void __weak watchdog_hardlockup_start(void) { } /** * lockup_detector_update_enable - Update the sysctl enable bit * - * Caller needs to make sure that the NMI/perf watchdogs are off, so this - * can't race with watchdog_nmi_disable(). + * Caller needs to make sure that the hard watchdogs are off, so this + * can't race with watchdog_hardlockup_disable(). */ static void lockup_detector_update_enable(void) { watchdog_enabled = 0; if (!watchdog_user_enabled) return; - if (nmi_watchdog_available && nmi_watchdog_user_enabled) - watchdog_enabled |= NMI_WATCHDOG_ENABLED; - if (soft_watchdog_user_enabled) - watchdog_enabled |= SOFT_WATCHDOG_ENABLED; + if (watchdog_hardlockup_available && watchdog_hardlockup_user_enabled) + watchdog_enabled |= WATCHDOG_HARDLOCKUP_ENABLED; + if (watchdog_softlockup_user_enabled) + watchdog_enabled |= WATCHDOG_SOFTOCKUP_ENABLED; } #ifdef CONFIG_SOFTLOCKUP_DETECTOR @@ -179,8 +286,6 @@ static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); static DEFINE_PER_CPU(unsigned long, watchdog_report_ts); static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(bool, softlockup_touch_sync); -static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); -static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static unsigned long soft_lockup_nmi_warn; static int __init nowatchdog_setup(char *str) @@ -192,7 +297,7 @@ __setup("nowatchdog", nowatchdog_setup); static int __init nosoftlockup_setup(char *str) { - soft_watchdog_user_enabled = 0; + watchdog_softlockup_user_enabled = 0; return 1; } __setup("nosoftlockup", nosoftlockup_setup); @@ -306,7 +411,7 @@ static int is_softlockup(unsigned long touch_ts, unsigned long period_ts, unsigned long now) { - if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){ + if ((watchdog_enabled & WATCHDOG_SOFTOCKUP_ENABLED) && watchdog_thresh) { /* Warn about unreasonable delays. */ if (time_after(now, period_ts + get_softlockup_thresh())) return now - touch_ts; @@ -315,22 +420,6 @@ static int is_softlockup(unsigned long touch_ts, } /* watchdog detector functions */ -bool is_hardlockup(void) -{ - unsigned long hrint = __this_cpu_read(hrtimer_interrupts); - - if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) - return true; - - __this_cpu_write(hrtimer_interrupts_saved, hrint); - return false; -} - -static void watchdog_interrupt_count(void) -{ - __this_cpu_inc(hrtimer_interrupts); -} - static DEFINE_PER_CPU(struct completion, softlockup_completion); static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work); @@ -361,8 +450,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) if (!watchdog_enabled) return HRTIMER_NORESTART; - /* kick the hardlockup detector */ - watchdog_interrupt_count(); + watchdog_hardlockup_kick(); /* kick the softlockup detector */ if (completion_done(this_cpu_ptr(&softlockup_completion))) { @@ -458,7 +546,7 @@ static void watchdog_enable(unsigned int cpu) complete(done); /* - * Start the timer first to prevent the NMI watchdog triggering + * Start the timer first to prevent the hardlockup watchdog triggering * before the timer has a chance to fire. */ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); @@ -468,9 +556,9 @@ static void watchdog_enable(unsigned int cpu) /* Initialize timestamp */ update_touch_ts(); - /* Enable the perf event */ - if (watchdog_enabled & NMI_WATCHDOG_ENABLED) - watchdog_nmi_enable(cpu); + /* Enable the hardlockup detector */ + if (watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED) + watchdog_hardlockup_enable(cpu); } static void watchdog_disable(unsigned int cpu) @@ -480,11 +568,11 @@ static void watchdog_disable(unsigned int cpu) WARN_ON_ONCE(cpu != smp_processor_id()); /* - * Disable the perf event first. That prevents that a large delay - * between disabling the timer and disabling the perf event causes - * the perf NMI to detect a false positive. + * Disable the hardlockup detector first. That prevents that a large + * delay between disabling the timer and disabling the hardlockup + * detector causes a false positive. */ - watchdog_nmi_disable(cpu); + watchdog_hardlockup_disable(cpu); hrtimer_cancel(hrtimer); wait_for_completion(this_cpu_ptr(&softlockup_completion)); } @@ -540,7 +628,7 @@ int lockup_detector_offline_cpu(unsigned int cpu) static void __lockup_detector_reconfigure(void) { cpus_read_lock(); - watchdog_nmi_stop(); + watchdog_hardlockup_stop(); softlockup_stop_all(); set_sample_period(); @@ -548,7 +636,7 @@ static void __lockup_detector_reconfigure(void) if (watchdog_enabled && watchdog_thresh) softlockup_start_all(); - watchdog_nmi_start(); + watchdog_hardlockup_start(); cpus_read_unlock(); /* * Must be called outside the cpus locked section to prevent @@ -589,9 +677,9 @@ static __init void lockup_detector_setup(void) static void __lockup_detector_reconfigure(void) { cpus_read_lock(); - watchdog_nmi_stop(); + watchdog_hardlockup_stop(); lockup_detector_update_enable(); - watchdog_nmi_start(); + watchdog_hardlockup_start(); cpus_read_unlock(); } void lockup_detector_reconfigure(void) @@ -646,14 +734,14 @@ static void proc_watchdog_update(void) /* * common function for watchdog, nmi_watchdog and soft_watchdog parameter * - * caller | table->data points to | 'which' - * -------------------|----------------------------|-------------------------- - * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED | - * | | SOFT_WATCHDOG_ENABLED - * -------------------|----------------------------|-------------------------- - * proc_nmi_watchdog | nmi_watchdog_user_enabled | NMI_WATCHDOG_ENABLED - * -------------------|----------------------------|-------------------------- - * proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED + * caller | table->data points to | 'which' + * -------------------|----------------------------------|------------------------------- + * proc_watchdog | watchdog_user_enabled | WATCHDOG_HARDLOCKUP_ENABLED | + * | | WATCHDOG_SOFTOCKUP_ENABLED + * -------------------|----------------------------------|------------------------------- + * proc_nmi_watchdog | watchdog_hardlockup_user_enabled | WATCHDOG_HARDLOCKUP_ENABLED + * -------------------|----------------------------------|------------------------------- + * proc_soft_watchdog | watchdog_softlockup_user_enabled | WATCHDOG_SOFTOCKUP_ENABLED */ static int proc_watchdog_common(int which, struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -685,7 +773,8 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, int proc_watchdog(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED, + return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED | + WATCHDOG_SOFTOCKUP_ENABLED, table, write, buffer, lenp, ppos); } @@ -695,9 +784,9 @@ int proc_watchdog(struct ctl_table *table, int write, int proc_nmi_watchdog(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - if (!nmi_watchdog_available && write) + if (!watchdog_hardlockup_available && write) return -ENOTSUPP; - return proc_watchdog_common(NMI_WATCHDOG_ENABLED, + return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED, table, write, buffer, lenp, ppos); } @@ -707,7 +796,7 @@ int proc_nmi_watchdog(struct ctl_table *table, int write, int proc_soft_watchdog(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - return proc_watchdog_common(SOFT_WATCHDOG_ENABLED, + return proc_watchdog_common(WATCHDOG_SOFTOCKUP_ENABLED, table, write, buffer, lenp, ppos); } @@ -774,15 +863,6 @@ static struct ctl_table watchdog_sysctls[] = { .extra2 = (void *)&sixty, }, { - .procname = "nmi_watchdog", - .data = &nmi_watchdog_user_enabled, - .maxlen = sizeof(int), - .mode = NMI_WATCHDOG_SYSCTL_PERM, - .proc_handler = proc_nmi_watchdog, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { .procname = "watchdog_cpumask", .data = &watchdog_cpumask_bits, .maxlen = NR_CPUS, @@ -792,7 +872,7 @@ static struct ctl_table watchdog_sysctls[] = { #ifdef CONFIG_SOFTLOCKUP_DETECTOR { .procname = "soft_watchdog", - .data = &soft_watchdog_user_enabled, + .data = &watchdog_softlockup_user_enabled, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_soft_watchdog, @@ -845,14 +925,90 @@ static struct ctl_table watchdog_sysctls[] = { {} }; +static struct ctl_table watchdog_hardlockup_sysctl[] = { + { + .procname = "nmi_watchdog", + .data = &watchdog_hardlockup_user_enabled, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_nmi_watchdog, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + static void __init watchdog_sysctl_init(void) { register_sysctl_init("kernel", watchdog_sysctls); + + if (watchdog_hardlockup_available) + watchdog_hardlockup_sysctl[0].mode = 0644; + register_sysctl_init("kernel", watchdog_hardlockup_sysctl); } + #else #define watchdog_sysctl_init() do { } while (0) #endif /* CONFIG_SYSCTL */ +static void __init lockup_detector_delay_init(struct work_struct *work); +static bool allow_lockup_detector_init_retry __initdata; + +static struct work_struct detector_work __initdata = + __WORK_INITIALIZER(detector_work, lockup_detector_delay_init); + +static void __init lockup_detector_delay_init(struct work_struct *work) +{ + int ret; + + ret = watchdog_hardlockup_probe(); + if (ret) { + pr_info("Delayed init of the lockup detector failed: %d\n", ret); + pr_info("Hard watchdog permanently disabled\n"); + return; + } + + allow_lockup_detector_init_retry = false; + + watchdog_hardlockup_available = true; + lockup_detector_setup(); +} + +/* + * lockup_detector_retry_init - retry init lockup detector if possible. + * + * Retry hardlockup detector init. It is useful when it requires some + * functionality that has to be initialized later on a particular + * platform. + */ +void __init lockup_detector_retry_init(void) +{ + /* Must be called before late init calls */ + if (!allow_lockup_detector_init_retry) + return; + + schedule_work(&detector_work); +} + +/* + * Ensure that optional delayed hardlockup init is proceed before + * the init code and memory is freed. + */ +static int __init lockup_detector_check(void) +{ + /* Prevent any later retry. */ + allow_lockup_detector_init_retry = false; + + /* Make sure no work is pending. */ + flush_work(&detector_work); + + watchdog_sysctl_init(); + + return 0; + +} +late_initcall_sync(lockup_detector_check); + void __init lockup_detector_init(void) { if (tick_nohz_full_enabled()) @@ -861,8 +1017,10 @@ void __init lockup_detector_init(void) cpumask_copy(&watchdog_cpumask, housekeeping_cpumask(HK_TYPE_TIMER)); - if (!watchdog_nmi_probe()) - nmi_watchdog_available = true; + if (!watchdog_hardlockup_probe()) + watchdog_hardlockup_available = true; + else + allow_lockup_detector_init_retry = true; + lockup_detector_setup(); - watchdog_sysctl_init(); } diff --git a/kernel/watchdog_buddy.c b/kernel/watchdog_buddy.c new file mode 100644 index 000000000000..34dbfe091f4b --- /dev/null +++ b/kernel/watchdog_buddy.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/kernel.h> +#include <linux/nmi.h> +#include <linux/percpu-defs.h> + +static cpumask_t __read_mostly watchdog_cpus; + +static unsigned int watchdog_next_cpu(unsigned int cpu) +{ + unsigned int next_cpu; + + next_cpu = cpumask_next(cpu, &watchdog_cpus); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(&watchdog_cpus); + + if (next_cpu == cpu) + return nr_cpu_ids; + + return next_cpu; +} + +int __init watchdog_hardlockup_probe(void) +{ + return 0; +} + +void watchdog_hardlockup_enable(unsigned int cpu) +{ + unsigned int next_cpu; + + /* + * The new CPU will be marked online before the hrtimer interrupt + * gets a chance to run on it. If another CPU tests for a + * hardlockup on the new CPU before it has run its the hrtimer + * interrupt, it will get a false positive. Touch the watchdog on + * the new CPU to delay the check for at least 3 sampling periods + * to guarantee one hrtimer has run on the new CPU. + */ + watchdog_hardlockup_touch_cpu(cpu); + + /* + * We are going to check the next CPU. Our watchdog_hrtimer + * need not be zero if the CPU has already been online earlier. + * Touch the watchdog on the next CPU to avoid false positive + * if we try to check it in less then 3 interrupts. + */ + next_cpu = watchdog_next_cpu(cpu); + if (next_cpu < nr_cpu_ids) + watchdog_hardlockup_touch_cpu(next_cpu); + + /* + * Makes sure that watchdog is touched on this CPU before + * other CPUs could see it in watchdog_cpus. The counter + * part is in watchdog_buddy_check_hardlockup(). + */ + smp_wmb(); + + cpumask_set_cpu(cpu, &watchdog_cpus); +} + +void watchdog_hardlockup_disable(unsigned int cpu) +{ + unsigned int next_cpu = watchdog_next_cpu(cpu); + + /* + * Offlining this CPU will cause the CPU before this one to start + * checking the one after this one. If this CPU just finished checking + * the next CPU and updating hrtimer_interrupts_saved, and then the + * previous CPU checks it within one sample period, it will trigger a + * false positive. Touch the watchdog on the next CPU to prevent it. + */ + if (next_cpu < nr_cpu_ids) + watchdog_hardlockup_touch_cpu(next_cpu); + + /* + * Makes sure that watchdog is touched on the next CPU before + * this CPU disappear in watchdog_cpus. The counter part is in + * watchdog_buddy_check_hardlockup(). + */ + smp_wmb(); + + cpumask_clear_cpu(cpu, &watchdog_cpus); +} + +void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) +{ + unsigned int next_cpu; + + /* + * Test for hardlockups every 3 samples. The sample period is + * watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over + * watchdog_thresh (over by 20%). + */ + if (hrtimer_interrupts % 3 != 0) + return; + + /* check for a hardlockup on the next CPU */ + next_cpu = watchdog_next_cpu(smp_processor_id()); + if (next_cpu >= nr_cpu_ids) + return; + + /* + * Make sure that the watchdog was touched on next CPU when + * watchdog_next_cpu() returned another one because of + * a change in watchdog_hardlockup_enable()/disable(). + */ + smp_rmb(); + + watchdog_hardlockup_check(next_cpu, NULL); +} diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_perf.c index 247bf0b1582c..8ea00c4a24b2 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_perf.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Detect hard lockups on a system + * Detect hard lockups on a system using perf * * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. * @@ -20,28 +20,12 @@ #include <asm/irq_regs.h> #include <linux/perf_event.h> -static DEFINE_PER_CPU(bool, hard_watchdog_warn); -static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); static DEFINE_PER_CPU(struct perf_event *, dead_event); static struct cpumask dead_events_mask; -static unsigned long hardlockup_allcpu_dumped; static atomic_t watchdog_cpus = ATOMIC_INIT(0); -notrace void arch_touch_nmi_watchdog(void) -{ - /* - * Using __raw here because some code paths have - * preemption enabled. If preemption is enabled - * then interrupts should be enabled too, in which - * case we shouldn't have to worry about the watchdog - * going off. - */ - raw_cpu_write(watchdog_nmi_touch, true); -} -EXPORT_SYMBOL(arch_touch_nmi_watchdog); - #ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP static DEFINE_PER_CPU(ktime_t, last_timestamp); static DEFINE_PER_CPU(unsigned int, nmi_rearmed); @@ -114,61 +98,24 @@ static void watchdog_overflow_callback(struct perf_event *event, /* Ensure the watchdog never gets throttled */ event->hw.interrupts = 0; - if (__this_cpu_read(watchdog_nmi_touch) == true) { - __this_cpu_write(watchdog_nmi_touch, false); - return; - } - if (!watchdog_check_timestamp()) return; - /* check for a hardlockup - * This is done by making sure our timer interrupt - * is incrementing. The timer interrupt should have - * fired multiple times before we overflow'd. If it hasn't - * then this is a good indication the cpu is stuck - */ - if (is_hardlockup()) { - int this_cpu = smp_processor_id(); - - /* only print hardlockups once */ - if (__this_cpu_read(hard_watchdog_warn) == true) - return; - - pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", - this_cpu); - print_modules(); - print_irqtrace_events(current); - if (regs) - show_regs(regs); - else - dump_stack(); - - /* - * Perform all-CPU dump only once to avoid multiple hardlockups - * generating interleaving traces - */ - if (sysctl_hardlockup_all_cpu_backtrace && - !test_and_set_bit(0, &hardlockup_allcpu_dumped)) - trigger_allbutself_cpu_backtrace(); - - if (hardlockup_panic) - nmi_panic(regs, "Hard LOCKUP"); - - __this_cpu_write(hard_watchdog_warn, true); - return; - } - - __this_cpu_write(hard_watchdog_warn, false); - return; + watchdog_hardlockup_check(smp_processor_id(), regs); } static int hardlockup_detector_event_create(void) { - unsigned int cpu = smp_processor_id(); + unsigned int cpu; struct perf_event_attr *wd_attr; struct perf_event *evt; + /* + * Preemption is not disabled because memory will be allocated. + * Ensure CPU-locality by calling this in per-CPU kthread. + */ + WARN_ON(!is_percpu_thread()); + cpu = raw_smp_processor_id(); wd_attr = &wd_hw_attr; wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); @@ -185,10 +132,14 @@ static int hardlockup_detector_event_create(void) } /** - * hardlockup_detector_perf_enable - Enable the local event + * watchdog_hardlockup_enable - Enable the local event + * + * @cpu: The CPU to enable hard lockup on. */ -void hardlockup_detector_perf_enable(void) +void watchdog_hardlockup_enable(unsigned int cpu) { + WARN_ON_ONCE(cpu != smp_processor_id()); + if (hardlockup_detector_event_create()) return; @@ -200,12 +151,16 @@ void hardlockup_detector_perf_enable(void) } /** - * hardlockup_detector_perf_disable - Disable the local event + * watchdog_hardlockup_disable - Disable the local event + * + * @cpu: The CPU to enable hard lockup on. */ -void hardlockup_detector_perf_disable(void) +void watchdog_hardlockup_disable(unsigned int cpu) { struct perf_event *event = this_cpu_read(watchdog_ev); + WARN_ON_ONCE(cpu != smp_processor_id()); + if (event) { perf_event_disable(event); this_cpu_write(watchdog_ev, NULL); @@ -268,7 +223,7 @@ void __init hardlockup_detector_perf_restart(void) lockdep_assert_cpus_held(); - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)) return; for_each_online_cpu(cpu) { @@ -279,12 +234,22 @@ void __init hardlockup_detector_perf_restart(void) } } +bool __weak __init arch_perf_nmi_is_available(void) +{ + return true; +} + /** - * hardlockup_detector_perf_init - Probe whether NMI event is available at all + * watchdog_hardlockup_probe - Probe whether NMI event is available at all */ -int __init hardlockup_detector_perf_init(void) +int __init watchdog_hardlockup_probe(void) { - int ret = hardlockup_detector_event_create(); + int ret; + + if (!arch_perf_nmi_is_available()) + return -ENODEV; + + ret = hardlockup_detector_event_create(); if (ret) { pr_info("Perf NMI watchdog permanently disabled\n"); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 376784ad3545..781f061ec0fa 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1035,27 +1035,30 @@ config BOOTPARAM_SOFTLOCKUP_PANIC Say N if unsure. -config HARDLOCKUP_DETECTOR_PERF +config HAVE_HARDLOCKUP_DETECTOR_BUDDY bool - select SOFTLOCKUP_DETECTOR + depends on SMP + default y # -# Enables a timestamp based low pass filter to compensate for perf based -# hard lockup detection which runs too fast due to turbo modes. +# Global switch whether to build a hardlockup detector at all. It is available +# only when the architecture supports at least one implementation. There are +# two exceptions. The hardlockup detector is never enabled on: # -config HARDLOCKUP_CHECK_TIMESTAMP - bool - +# s390: it reported many false positives there # -# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard -# lockup detector rather than the perf based detector. +# sparc64: has a custom implementation which is not using the common +# hardlockup command line options and sysctl interface. # config HARDLOCKUP_DETECTOR bool "Detect Hard Lockups" - depends on DEBUG_KERNEL && !S390 - depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH + depends on DEBUG_KERNEL && !S390 && !HARDLOCKUP_DETECTOR_SPARC64 + depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_BUDDY || HAVE_HARDLOCKUP_DETECTOR_ARCH + imply HARDLOCKUP_DETECTOR_PERF + imply HARDLOCKUP_DETECTOR_BUDDY + imply HARDLOCKUP_DETECTOR_ARCH select LOCKUP_DETECTOR - select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF + help Say Y here to enable the kernel to act as a watchdog to detect hard lockups. @@ -1065,6 +1068,63 @@ config HARDLOCKUP_DETECTOR chance to run. The current stack trace is displayed upon detection and the system will stay locked up. +# +# Note that arch-specific variants are always preferred. +# +config HARDLOCKUP_DETECTOR_PREFER_BUDDY + bool "Prefer the buddy CPU hardlockup detector" + depends on HARDLOCKUP_DETECTOR + depends on HAVE_HARDLOCKUP_DETECTOR_PERF && HAVE_HARDLOCKUP_DETECTOR_BUDDY + depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH + help + Say Y here to prefer the buddy hardlockup detector over the perf one. + + With the buddy detector, each CPU uses its softlockup hrtimer + to check that the next CPU is processing hrtimer interrupts by + verifying that a counter is increasing. + + This hardlockup detector is useful on systems that don't have + an arch-specific hardlockup detector or if resources needed + for the hardlockup detector are better used for other things. + +config HARDLOCKUP_DETECTOR_PERF + bool + depends on HARDLOCKUP_DETECTOR + depends on HAVE_HARDLOCKUP_DETECTOR_PERF && !HARDLOCKUP_DETECTOR_PREFER_BUDDY + depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH + select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER + +config HARDLOCKUP_DETECTOR_BUDDY + bool + depends on HARDLOCKUP_DETECTOR + depends on HAVE_HARDLOCKUP_DETECTOR_BUDDY + depends on !HAVE_HARDLOCKUP_DETECTOR_PERF || HARDLOCKUP_DETECTOR_PREFER_BUDDY + depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH + select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER + +config HARDLOCKUP_DETECTOR_ARCH + bool + depends on HARDLOCKUP_DETECTOR + depends on HAVE_HARDLOCKUP_DETECTOR_ARCH + help + The arch-specific implementation of the hardlockup detector will + be used. + +# +# Both the "perf" and "buddy" hardlockup detectors count hrtimer +# interrupts. This config enables functions managing this common code. +# +config HARDLOCKUP_DETECTOR_COUNTS_HRTIMER + bool + select SOFTLOCKUP_DETECTOR + +# +# Enables a timestamp based low pass filter to compensate for perf based +# hard lockup detection which runs too fast due to turbo modes. +# +config HARDLOCKUP_CHECK_TIMESTAMP + bool + config BOOTPARAM_HARDLOCKUP_PANIC bool "Panic (Reboot) On Hard Lockups" depends on HARDLOCKUP_DETECTOR diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index 6130c42b8e59..e19199f4a684 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -39,7 +39,7 @@ static long INIT nofill(void *buffer, unsigned long len) } /* Included from initramfs et al code */ -STATIC int INIT __gunzip(unsigned char *buf, long len, +static int INIT __gunzip(unsigned char *buf, long len, long (*fill)(void*, unsigned long), long (*flush)(void*, unsigned long), unsigned char *out_buf, long out_len, diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c index 9f4262ee33a5..353268b9f129 100644 --- a/lib/decompress_unxz.c +++ b/lib/decompress_unxz.c @@ -102,6 +102,8 @@ */ #ifdef STATIC # define XZ_PREBOOT +#else +#include <linux/decompress/unxz.h> #endif #ifdef __KERNEL__ # include <linux/decompress/mm.h> diff --git a/lib/decompress_unzstd.c b/lib/decompress_unzstd.c index a512b99ae16a..bba2c0bb10cb 100644 --- a/lib/decompress_unzstd.c +++ b/lib/decompress_unzstd.c @@ -69,6 +69,8 @@ # define UNZSTD_PREBOOT # include "xxhash.c" # include "zstd/decompress_sources.h" +#else +#include <linux/decompress/unzstd.h> #endif #include <linux/decompress/mm.h> diff --git a/lib/devmem_is_allowed.c b/lib/devmem_is_allowed.c index 60be9e24bd57..9c060c69f134 100644 --- a/lib/devmem_is_allowed.c +++ b/lib/devmem_is_allowed.c @@ -10,6 +10,7 @@ #include <linux/mm.h> #include <linux/ioport.h> +#include <linux/io.h> /* * devmem_is_allowed() checks to see if /dev/mem access to a certain address diff --git a/lib/devres.c b/lib/devres.c index 6baf43902ead..c44f104b58d5 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -129,7 +129,7 @@ __devm_ioremap_resource(struct device *dev, const struct resource *res, BUG_ON(!dev); if (!res || resource_type(res) != IORESOURCE_MEM) { - dev_err(dev, "invalid resource\n"); + dev_err(dev, "invalid resource %pR\n", res); return IOMEM_ERR_PTR(-EINVAL); } diff --git a/lib/kunit/debugfs.c b/lib/kunit/debugfs.c index b08bb1fba106..22c5c496a68f 100644 --- a/lib/kunit/debugfs.c +++ b/lib/kunit/debugfs.c @@ -10,6 +10,7 @@ #include <kunit/test.h> #include "string-stream.h" +#include "debugfs.h" #define KUNIT_DEBUGFS_ROOT "kunit" #define KUNIT_DEBUGFS_RESULTS "results" diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h index f06df065dec0..2c34e8a33a1c 100644 --- a/lib/zstd/common/zstd_deps.h +++ b/lib/zstd/common/zstd_deps.h @@ -105,21 +105,3 @@ static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) { #endif /* ZSTD_DEPS_IO */ #endif /* ZSTD_DEPS_NEED_IO */ - -/* - * Only requested when MSAN is enabled. - * Need: - * intptr_t - */ -#ifdef ZSTD_DEPS_NEED_STDINT -#ifndef ZSTD_DEPS_STDINT -#define ZSTD_DEPS_STDINT - -/* - * The Linux Kernel doesn't provide intptr_t, only uintptr_t, which - * is an unsigned long. - */ -typedef long intptr_t; - -#endif /* ZSTD_DEPS_STDINT */ -#endif /* ZSTD_DEPS_NEED_STDINT */ diff --git a/mm/sparse.c b/mm/sparse.c index 7a29e10193fe..297a8b772e8d 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -701,7 +701,7 @@ static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages) return rc; } #else -struct page * __meminit populate_section_memmap(unsigned long pfn, +static struct page * __meminit populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap, struct dev_pagemap *pgmap) { diff --git a/scripts/spelling.txt b/scripts/spelling.txt index f8bd6178d17b..fc7ba95e86a0 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -155,6 +155,7 @@ aquired||acquired aquisition||acquisition arbitary||arbitrary architechture||architecture +archtecture||architecture arguement||argument arguements||arguments arithmatic||arithmetic @@ -279,6 +280,7 @@ cant'||can't canot||cannot cann't||can't cannnot||cannot +capabiity||capability capabilites||capabilities capabilties||capabilities capabilty||capability @@ -426,6 +428,7 @@ cotrol||control cound||could couter||counter coutner||counter +creationg||creating cryptocraphic||cryptographic cummulative||cumulative cunter||counter @@ -492,6 +495,7 @@ destorys||destroys destroied||destroyed detabase||database deteced||detected +detecion||detection detectt||detect detroyed||destroyed develope||develop @@ -513,6 +517,7 @@ diferent||different differrence||difference diffrent||different differenciate||differentiate +diffreential||differential diffrentiate||differentiate difinition||definition digial||digital @@ -617,6 +622,7 @@ evalute||evaluate evalutes||evaluates evalution||evaluation excecutable||executable +excceed||exceed exceded||exceeded exceds||exceeds exceeed||exceed @@ -632,6 +638,7 @@ existant||existent exixt||exist exsits||exists exlcude||exclude +exlcuding||excluding exlcusive||exclusive exlusive||exclusive exmaple||example @@ -726,6 +733,8 @@ generiously||generously genereate||generate genereted||generated genric||generic +gerenal||general +geting||getting globel||global grabing||grabbing grahical||graphical @@ -899,6 +908,7 @@ iteraions||iterations iternations||iterations itertation||iteration itslef||itself +ivalid||invalid jave||java jeffies||jiffies jumpimng||jumping @@ -977,6 +987,7 @@ microprocesspr||microprocessor migrateable||migratable millenium||millennium milliseonds||milliseconds +minimim||minimum minium||minimum minimam||minimum minimun||minimum @@ -1042,6 +1053,7 @@ notifed||notified notity||notify nubmer||number numebr||number +numer||number numner||number nunber||number obtaion||obtain @@ -1061,6 +1073,7 @@ offet||offset offlaod||offload offloded||offloaded offseting||offsetting +oflload||offload omited||omitted omiting||omitting omitt||omit @@ -1105,6 +1118,7 @@ pakage||package paket||packet pallette||palette paln||plan +palne||plane paramameters||parameters paramaters||parameters paramater||parameter @@ -1181,12 +1195,14 @@ previsously||previously primative||primitive princliple||principle priorty||priority +priting||printing privilaged||privileged privilage||privilege priviledge||privilege priviledges||privileges privleges||privileges probaly||probably +probabalistic||probabilistic procceed||proceed proccesors||processors procesed||processed @@ -1460,6 +1476,7 @@ submited||submitted submition||submission succeded||succeeded suceed||succeed +succesfuly||successfully succesfully||successfully succesful||successful successed||succeeded @@ -1503,6 +1520,7 @@ symetric||symmetric synax||syntax synchonized||synchronized sychronization||synchronization +sychronously||synchronously synchronuously||synchronously syncronize||synchronize syncronized||synchronized @@ -1532,6 +1550,7 @@ threee||three threshhold||threshold thresold||threshold throught||through +tansition||transition trackling||tracking troughput||throughput trys||tries @@ -1611,6 +1630,7 @@ unneccessary||unnecessary unnecesary||unnecessary unneedingly||unnecessarily unnsupported||unsupported +unuspported||unsupported unmached||unmatched unprecise||imprecise unpriviledged||unprivileged @@ -1657,6 +1677,7 @@ verfication||verification veriosn||version verisons||versions verison||version +veritical||vertical verson||version vicefersa||vice-versa virtal||virtual @@ -1677,6 +1698,7 @@ whenver||whenever wheter||whether whe||when wierd||weird +wihout||without wiil||will wirte||write withing||within |