diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-29 23:53:51 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-29 23:53:51 +0200 |
commit | d68b4b6f307d155475cce541f2aee938032ed22e (patch) | |
tree | c2a6487ac8b1bce963b5b352b42e461a6fa8da15 /arch/x86 | |
parent | Merge tag 'mm-stable-2023-08-28-18-26' of git://git.kernel.org/pub/scm/linux/... (diff) | |
parent | document while_each_thread(), change first_tid() to use for_each_thread() (diff) | |
download | linux-d68b4b6f307d155475cce541f2aee938032ed22e.tar.xz linux-d68b4b6f307d155475cce541f2aee938032ed22e.zip |
Merge tag 'mm-nonmm-stable-2023-08-28-22-48' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-MM updates from Andrew Morton:
- An extensive rework of kexec and crash Kconfig from Eric DeVolder
("refactor Kconfig to consolidate KEXEC and CRASH options")
- kernel.h slimming work from Andy Shevchenko ("kernel.h: Split out a
couple of macros to args.h")
- gdb feature work from Kuan-Ying Lee ("Add GDB memory helper
commands")
- vsprintf inclusion rationalization from Andy Shevchenko
("lib/vsprintf: Rework header inclusions")
- Switch the handling of kdump from a udev scheme to in-kernel
handling, by Eric DeVolder ("crash: Kernel handling of CPU and memory
hot un/plug")
- Many singleton patches to various parts of the tree
* tag 'mm-nonmm-stable-2023-08-28-22-48' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (81 commits)
document while_each_thread(), change first_tid() to use for_each_thread()
drivers/char/mem.c: shrink character device's devlist[] array
x86/crash: optimize CPU changes
crash: change crash_prepare_elf64_headers() to for_each_possible_cpu()
crash: hotplug support for kexec_load()
x86/crash: add x86 crash hotplug support
crash: memory and CPU hotplug sysfs attributes
kexec: exclude elfcorehdr from the segment digest
crash: add generic infrastructure for crash hotplug support
crash: move a few code bits to setup support of crash hotplug
kstrtox: consistently use _tolower()
kill do_each_thread()
nilfs2: fix WARNING in mark_buffer_dirty due to discarded buffer reuse
scripts/bloat-o-meter: count weak symbol sizes
treewide: drop CONFIG_EMBEDDED
lockdep: fix static memory detection even more
lib/vsprintf: declare no_hash_pointers in sprintf.h
lib/vsprintf: split out sprintf() and friends
kernel/fork: stop playing lockless games for exe_file replacement
adfs: delete unused "union adfs_dirtail" definition
...
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 95 | ||||
-rw-r--r-- | arch/x86/include/asm/irq.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/kexec.h | 18 | ||||
-rw-r--r-- | arch/x86/include/asm/rmwcc.h | 11 | ||||
-rw-r--r-- | arch/x86/include/asm/sections.h | 18 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 9 | ||||
-rw-r--r-- | arch/x86/kernel/apic/hw_nmi.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/crash.c | 142 | ||||
-rw-r--r-- | arch/x86/pci/amd_bus.c | 8 | ||||
-rw-r--r-- | arch/x86/pci/bus_numa.c | 2 |
10 files changed, 194 insertions, 115 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ad3a53e28aac..bd9a1804cf72 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2006,88 +2006,37 @@ config EFI_RUNTIME_MAP source "kernel/Kconfig.hz" -config KEXEC - bool "kexec system call" - select KEXEC_CORE - help - kexec is a system call that implements the ability to shutdown your - current kernel, and to start another kernel. It is like a reboot - but it is independent of the system firmware. And like a reboot - you can start any kernel with it, not just Linux. - - The name comes from the similarity to the exec system call. - - It is an ongoing process to be certain the hardware in a machine - is properly shutdown, so do not be surprised if this code does not - initially work for you. As of this writing the exact hardware - interface is strongly in flux, so no good recommendation can be - made. - -config KEXEC_FILE - bool "kexec file based system call" - select KEXEC_CORE - select HAVE_IMA_KEXEC if IMA - depends on X86_64 - depends on CRYPTO=y - depends on CRYPTO_SHA256=y - help - This is new version of kexec system call. This system call is - file based and takes file descriptors as system call argument - for kernel and initramfs as opposed to list of segments as - accepted by previous system call. +config ARCH_SUPPORTS_KEXEC + def_bool y -config ARCH_HAS_KEXEC_PURGATORY - def_bool KEXEC_FILE +config ARCH_SUPPORTS_KEXEC_FILE + def_bool X86_64 && CRYPTO && CRYPTO_SHA256 -config KEXEC_SIG - bool "Verify kernel signature during kexec_file_load() syscall" +config ARCH_SELECTS_KEXEC_FILE + def_bool y depends on KEXEC_FILE - help + select HAVE_IMA_KEXEC if IMA - This option makes the kexec_file_load() syscall check for a valid - signature of the kernel image. The image can still be loaded without - a valid signature unless you also enable KEXEC_SIG_FORCE, though if - there's a signature that we can check, then it must be valid. +config ARCH_SUPPORTS_KEXEC_PURGATORY + def_bool KEXEC_FILE - In addition to this option, you need to enable signature - verification for the corresponding kernel image type being - loaded in order for this to work. +config ARCH_SUPPORTS_KEXEC_SIG + def_bool y -config KEXEC_SIG_FORCE - bool "Require a valid signature in kexec_file_load() syscall" - depends on KEXEC_SIG - help - This option makes kernel signature verification mandatory for - the kexec_file_load() syscall. +config ARCH_SUPPORTS_KEXEC_SIG_FORCE + def_bool y -config KEXEC_BZIMAGE_VERIFY_SIG - bool "Enable bzImage signature verification support" - depends on KEXEC_SIG - depends on SIGNED_PE_FILE_VERIFICATION - select SYSTEM_TRUSTED_KEYRING - help - Enable bzImage signature verification support. +config ARCH_SUPPORTS_KEXEC_BZIMAGE_VERIFY_SIG + def_bool y -config CRASH_DUMP - bool "kernel crash dumps" - depends on X86_64 || (X86_32 && HIGHMEM) - help - Generate crash dump after being started by kexec. - This should be normally only set in special crash dump kernels - which are loaded in the main kernel with kexec-tools into - a specially reserved region and then later executed after - a crash by kdump/kexec. The crash dump kernel must be compiled - to a memory address not used by the main kernel or BIOS using - PHYSICAL_START, or it must be built as a relocatable image - (CONFIG_RELOCATABLE=y). - For more details see Documentation/admin-guide/kdump/kdump.rst +config ARCH_SUPPORTS_KEXEC_JUMP + def_bool y -config KEXEC_JUMP - bool "kexec jump" - depends on KEXEC && HIBERNATION - help - Jump between original kernel and kexeced kernel and invoke - code in physical address mode via KEXEC +config ARCH_SUPPORTS_CRASH_DUMP + def_bool X86_64 || (X86_32 && HIGHMEM) + +config ARCH_SUPPORTS_CRASH_HOTPLUG + def_bool y config PHYSICAL_START hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 29e083b92813..836c170d3087 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -42,7 +42,7 @@ extern void init_ISA_irqs(void); #ifdef CONFIG_X86_LOCAL_APIC void arch_trigger_cpumask_backtrace(const struct cpumask *mask, - bool exclude_self); + int exclude_cpu); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace #endif diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 5b77bbc28f96..3be6a98751f0 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -209,6 +209,24 @@ typedef void crash_vmclear_fn(void); extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; extern void kdump_nmi_shootdown_cpus(void); +#ifdef CONFIG_CRASH_HOTPLUG +void arch_crash_handle_hotplug_event(struct kimage *image); +#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event + +#ifdef CONFIG_HOTPLUG_CPU +int arch_crash_hotplug_cpu_support(void); +#define crash_hotplug_cpu_support arch_crash_hotplug_cpu_support +#endif + +#ifdef CONFIG_MEMORY_HOTPLUG +int arch_crash_hotplug_memory_support(void); +#define crash_hotplug_memory_support arch_crash_hotplug_memory_support +#endif + +unsigned int arch_crash_get_elfcorehdr_size(void); +#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_KEXEC_H */ diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 7fa611216417..4b081e0d3306 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -2,12 +2,7 @@ #ifndef _ASM_X86_RMWcc #define _ASM_X86_RMWcc -/* This counts to 12. Any more, it will return 13th argument. */ -#define __RMWcc_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n -#define RMWcc_ARGS(X...) __RMWcc_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) - -#define __RMWcc_CONCAT(a, b) a ## b -#define RMWcc_CONCAT(a, b) __RMWcc_CONCAT(a, b) +#include <linux/args.h> #define __CLOBBERS_MEM(clb...) "memory", ## clb @@ -48,7 +43,7 @@ cc_label: c = true; \ #define GEN_UNARY_RMWcc_3(op, var, cc) \ GEN_UNARY_RMWcc_4(op, var, cc, "%[var]") -#define GEN_UNARY_RMWcc(X...) RMWcc_CONCAT(GEN_UNARY_RMWcc_, RMWcc_ARGS(X))(X) +#define GEN_UNARY_RMWcc(X...) CONCATENATE(GEN_UNARY_RMWcc_, COUNT_ARGS(X))(X) #define GEN_BINARY_RMWcc_6(op, var, cc, vcon, _val, arg0) \ __GEN_RMWcc(op " %[val], " arg0, var, cc, \ @@ -57,7 +52,7 @@ cc_label: c = true; \ #define GEN_BINARY_RMWcc_5(op, var, cc, vcon, val) \ GEN_BINARY_RMWcc_6(op, var, cc, vcon, val, "%[var]") -#define GEN_BINARY_RMWcc(X...) RMWcc_CONCAT(GEN_BINARY_RMWcc_, RMWcc_ARGS(X))(X) +#define GEN_BINARY_RMWcc(X...) CONCATENATE(GEN_BINARY_RMWcc_, COUNT_ARGS(X))(X) #define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, cc, clobbers...) \ __GEN_RMWcc(op " %[var]\n\t" suffix, var, cc, \ diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index a6e8373a5170..3fa87e5e11ab 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -2,8 +2,6 @@ #ifndef _ASM_X86_SECTIONS_H #define _ASM_X86_SECTIONS_H -#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed - #include <asm-generic/sections.h> #include <asm/extable.h> @@ -18,20 +16,4 @@ extern char __end_of_kernel_reserve[]; extern unsigned long _brk_start, _brk_end; -static inline bool arch_is_kernel_initmem_freed(unsigned long addr) -{ - /* - * If _brk_start has not been cleared, brk allocation is incomplete, - * and we can not make assumptions about its use. - */ - if (_brk_start) - return 0; - - /* - * After brk allocation is complete, space between _brk_end and _end - * is available for allocation. - */ - return addr >= _brk_end && addr < (unsigned long)&_end; -} - #endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4070a01c11b7..00df34c263cc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -33,11 +33,10 @@ KCSAN_SANITIZE := n KMSAN_SANITIZE_head$(BITS).o := n KMSAN_SANITIZE_nmi.o := n -# If instrumentation of this dir is enabled, boot hangs during first second. -# Probably could be more selective here, but note that files related to irqs, -# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to -# non-deterministic coverage. -KCOV_INSTRUMENT := n +# If instrumentation of the following files is enabled, boot hangs during +# first second. +KCOV_INSTRUMENT_head$(BITS).o := n +KCOV_INSTRUMENT_sev.o := n CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 34a992e275ef..d6e01f924299 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -34,9 +34,9 @@ static void nmi_raise_cpu_backtrace(cpumask_t *mask) apic->send_IPI_mask(mask, NMI_VECTOR); } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) { - nmi_trigger_cpumask_backtrace(mask, exclude_self, + nmi_trigger_cpumask_backtrace(mask, exclude_cpu, nmi_raise_cpu_backtrace); } diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index cdd92ab43cda..587c7743fd21 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -158,8 +158,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) crash_save_cpu(regs, safe_smp_processor_id()); } -#ifdef CONFIG_KEXEC_FILE - +#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_HOTPLUG) static int get_nr_ram_ranges_callback(struct resource *res, void *arg) { unsigned int *nr_ranges = arg; @@ -231,7 +230,7 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) /* Prepare elf headers. Return addr and size */ static int prepare_elf_headers(struct kimage *image, void **addr, - unsigned long *sz) + unsigned long *sz, unsigned long *nr_mem_ranges) { struct crash_mem *cmem; int ret; @@ -249,6 +248,9 @@ static int prepare_elf_headers(struct kimage *image, void **addr, if (ret) goto out; + /* Return the computed number of memory ranges, for hotplug usage */ + *nr_mem_ranges = cmem->nr_ranges; + /* By default prepare 64bit headers */ ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz); @@ -256,7 +258,9 @@ out: vfree(cmem); return ret; } +#endif +#ifdef CONFIG_KEXEC_FILE static int add_e820_entry(struct boot_params *params, struct e820_entry *entry) { unsigned int nr_e820_entries; @@ -371,18 +375,42 @@ out: int crash_load_segments(struct kimage *image) { int ret; + unsigned long pnum = 0; struct kexec_buf kbuf = { .image = image, .buf_min = 0, .buf_max = ULONG_MAX, .top_down = false }; /* Prepare elf headers and add a segment */ - ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz); + ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz, &pnum); if (ret) return ret; - image->elf_headers = kbuf.buffer; - image->elf_headers_sz = kbuf.bufsz; + image->elf_headers = kbuf.buffer; + image->elf_headers_sz = kbuf.bufsz; + kbuf.memsz = kbuf.bufsz; + +#ifdef CONFIG_CRASH_HOTPLUG + /* + * The elfcorehdr segment size accounts for VMCOREINFO, kernel_map, + * maximum CPUs and maximum memory ranges. + */ + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) + pnum = 2 + CONFIG_NR_CPUS_DEFAULT + CONFIG_CRASH_MAX_MEMORY_RANGES; + else + pnum += 2 + CONFIG_NR_CPUS_DEFAULT; + + if (pnum < (unsigned long)PN_XNUM) { + kbuf.memsz = pnum * sizeof(Elf64_Phdr); + kbuf.memsz += sizeof(Elf64_Ehdr); + + image->elfcorehdr_index = image->nr_segments; + + /* Mark as usable to crash kernel, else crash kernel fails on boot */ + image->elf_headers_sz = kbuf.memsz; + } else { + pr_err("number of Phdrs %lu exceeds max\n", pnum); + } +#endif - kbuf.memsz = kbuf.bufsz; kbuf.buf_align = ELF_CORE_HEADER_ALIGN; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); @@ -395,3 +423,103 @@ int crash_load_segments(struct kimage *image) return ret; } #endif /* CONFIG_KEXEC_FILE */ + +#ifdef CONFIG_CRASH_HOTPLUG + +#undef pr_fmt +#define pr_fmt(fmt) "crash hp: " fmt + +/* These functions provide the value for the sysfs crash_hotplug nodes */ +#ifdef CONFIG_HOTPLUG_CPU +int arch_crash_hotplug_cpu_support(void) +{ + return crash_check_update_elfcorehdr(); +} +#endif + +#ifdef CONFIG_MEMORY_HOTPLUG +int arch_crash_hotplug_memory_support(void) +{ + return crash_check_update_elfcorehdr(); +} +#endif + +unsigned int arch_crash_get_elfcorehdr_size(void) +{ + unsigned int sz; + + /* kernel_map, VMCOREINFO and maximum CPUs */ + sz = 2 + CONFIG_NR_CPUS_DEFAULT; + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) + sz += CONFIG_CRASH_MAX_MEMORY_RANGES; + sz *= sizeof(Elf64_Phdr); + return sz; +} + +/** + * arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes + * @image: a pointer to kexec_crash_image + * + * Prepare the new elfcorehdr and replace the existing elfcorehdr. + */ +void arch_crash_handle_hotplug_event(struct kimage *image) +{ + void *elfbuf = NULL, *old_elfcorehdr; + unsigned long nr_mem_ranges; + unsigned long mem, memsz; + unsigned long elfsz = 0; + + /* + * As crash_prepare_elf64_headers() has already described all + * possible CPUs, there is no need to update the elfcorehdr + * for additional CPU changes. + */ + if ((image->file_mode || image->elfcorehdr_updated) && + ((image->hp_action == KEXEC_CRASH_HP_ADD_CPU) || + (image->hp_action == KEXEC_CRASH_HP_REMOVE_CPU))) + return; + + /* + * Create the new elfcorehdr reflecting the changes to CPU and/or + * memory resources. + */ + if (prepare_elf_headers(image, &elfbuf, &elfsz, &nr_mem_ranges)) { + pr_err("unable to create new elfcorehdr"); + goto out; + } + + /* + * Obtain address and size of the elfcorehdr segment, and + * check it against the new elfcorehdr buffer. + */ + mem = image->segment[image->elfcorehdr_index].mem; + memsz = image->segment[image->elfcorehdr_index].memsz; + if (elfsz > memsz) { + pr_err("update elfcorehdr elfsz %lu > memsz %lu", + elfsz, memsz); + goto out; + } + + /* + * Copy new elfcorehdr over the old elfcorehdr at destination. + */ + old_elfcorehdr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT)); + if (!old_elfcorehdr) { + pr_err("mapping elfcorehdr segment failed\n"); + goto out; + } + + /* + * Temporarily invalidate the crash image while the + * elfcorehdr is updated. + */ + xchg(&kexec_crash_image, NULL); + memcpy_flushcache(old_elfcorehdr, elfbuf, elfsz); + xchg(&kexec_crash_image, image); + kunmap_local(old_elfcorehdr); + pr_debug("updated elfcorehdr\n"); + +out: + vfree(elfbuf); +} +#endif diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index dd40d3fea74e..631512f7ec85 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -51,6 +51,14 @@ static struct pci_root_info __init *find_pci_root_info(int node, int link) return NULL; } +static inline resource_size_t cap_resource(u64 val) +{ + if (val > RESOURCE_SIZE_MAX) + return RESOURCE_SIZE_MAX; + + return val; +} + /** * early_root_info_init() * called before pcibios_scan_root and pci_scan_bus diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index 2752c02e3f0e..e4a525e59eaf 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c @@ -101,7 +101,7 @@ void update_res(struct pci_root_info *info, resource_size_t start, if (start > end) return; - if (start == MAX_RESOURCE) + if (start == RESOURCE_SIZE_MAX) return; if (!merge) |