diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-02 21:08:10 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-02 21:08:10 +0200 |
commit | 71bd9341011f626d692aabe024f099820f02c497 (patch) | |
tree | a1c27fd8f17daff36e380800c5b69769d0d9cc99 /include | |
parent | Merge branch 'for-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/c... (diff) | |
parent | ipc/util.c: use binary search for max_idx (diff) | |
download | linux-71bd9341011f626d692aabe024f099820f02c497.tar.xz linux-71bd9341011f626d692aabe024f099820f02c497.zip |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
"190 patches.
Subsystems affected by this patch series: mm (hugetlb, userfaultfd,
vmscan, kconfig, proc, z3fold, zbud, ras, mempolicy, memblock,
migration, thp, nommu, kconfig, madvise, memory-hotplug, zswap,
zsmalloc, zram, cleanups, kfence, and hmm), procfs, sysctl, misc,
core-kernel, lib, lz4, checkpatch, init, kprobes, nilfs2, hfs,
signals, exec, kcov, selftests, compress/decompress, and ipc"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (190 commits)
ipc/util.c: use binary search for max_idx
ipc/sem.c: use READ_ONCE()/WRITE_ONCE() for use_global_lock
ipc: use kmalloc for msg_queue and shmid_kernel
ipc sem: use kvmalloc for sem_undo allocation
lib/decompressors: remove set but not used variabled 'level'
selftests/vm/pkeys: exercise x86 XSAVE init state
selftests/vm/pkeys: refill shadow register after implicit kernel write
selftests/vm/pkeys: handle negative sys_pkey_alloc() return code
selftests/vm/pkeys: fix alloc_random_pkey() to make it really, really random
kcov: add __no_sanitize_coverage to fix noinstr for all architectures
exec: remove checks in __register_bimfmt()
x86: signal: don't do sas_ss_reset() until we are certain that sigframe won't be abandoned
hfsplus: report create_date to kstat.btime
hfsplus: remove unnecessary oom message
nilfs2: remove redundant continue statement in a while-loop
kprobes: remove duplicated strong free_insn_page in x86 and s390
init: print out unknown kernel parameters
checkpatch: do not complain about positive return values starting with EPOLL
checkpatch: improve the indented label test
checkpatch: scripts/spdxcheck.py now requires python3
...
Diffstat (limited to 'include')
45 files changed, 744 insertions, 513 deletions
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index bafc51f483c4..edb0e2a602a8 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -18,7 +18,8 @@ #endif #ifndef __ASSEMBLY__ -#include <linux/kernel.h> +#include <linux/panic.h> +#include <linux/printk.h> #ifdef CONFIG_BUG diff --git a/include/linux/ascii85.h b/include/linux/ascii85.h index 4cc40201273e..83ad775ad0aa 100644 --- a/include/linux/ascii85.h +++ b/include/linux/ascii85.h @@ -8,7 +8,8 @@ #ifndef _ASCII85_H_ #define _ASCII85_H_ -#include <linux/kernel.h> +#include <linux/math.h> +#include <linux/types.h> #define ASCII85_BUFSZ 6 diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h new file mode 100644 index 000000000000..2bc8b1f69c93 --- /dev/null +++ b/include/linux/bootmem_info.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_BOOTMEM_INFO_H +#define __LINUX_BOOTMEM_INFO_H + +#include <linux/mm.h> + +/* + * Types for free bootmem stored in page->lru.next. These have to be in + * some random range in unsigned long space for debugging purposes. + */ +enum { + MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12, + SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE, + MIX_SECTION_INFO, + NODE_INFO, + MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO, +}; + +#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE +void __init register_page_bootmem_info_node(struct pglist_data *pgdat); + +void get_page_bootmem(unsigned long info, struct page *page, + unsigned long type); +void put_page_bootmem(struct page *page); + +/* + * Any memory allocated via the memblock allocator and not via the + * buddy will be marked reserved already in the memmap. For those + * pages, we can call this function to free it to buddy allocator. + */ +static inline void free_bootmem_page(struct page *page) +{ + unsigned long magic = (unsigned long)page->freelist; + + /* + * The reserve_bootmem_region sets the reserved flag on bootmem + * pages. + */ + VM_BUG_ON_PAGE(page_ref_count(page) != 2, page); + + if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) + put_page_bootmem(page); + else + VM_BUG_ON_PAGE(1, page); +} +#else +static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) +{ +} + +static inline void put_page_bootmem(struct page *page) +{ +} + +static inline void get_page_bootmem(unsigned long info, struct page *page, + unsigned long type) +{ +} + +static inline void free_bootmem_page(struct page *page) +{ + free_reserved_page(page); +} +#endif + +#endif /* __LINUX_BOOTMEM_INFO_H */ diff --git a/include/linux/compat.h b/include/linux/compat.h index 8855b1b702b2..c270124e4402 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -532,8 +532,6 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long); &__uss->ss_sp, label); \ unsafe_put_user(t->sas_ss_flags, &__uss->ss_flags, label); \ unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \ - if (t->sas_ss_flags & SS_AUTODISARM) \ - sas_ss_reset(t); \ } while (0); /* diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index adbe76b203e2..49b0ac8b6fd3 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -13,6 +13,12 @@ /* all clang versions usable with the kernel support KASAN ABI version 5 */ #define KASAN_ABI_VERSION 5 +/* + * Note: Checking __has_feature(*_sanitizer) is only true if the feature is + * enabled. Therefore it is not required to additionally check defined(CONFIG_*) + * to avoid adding redundant attributes in other configurations. + */ + #if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer) /* Emulate GCC's __SANITIZE_ADDRESS__ flag */ #define __SANITIZE_ADDRESS__ @@ -46,6 +52,17 @@ #endif /* + * Support for __has_feature(coverage_sanitizer) was added in Clang 13 together + * with no_sanitize("coverage"). Prior versions of Clang support coverage + * instrumentation, but cannot be queried for support by the preprocessor. + */ +#if __has_feature(coverage_sanitizer) +#define __no_sanitize_coverage __attribute__((no_sanitize("coverage"))) +#else +#define __no_sanitize_coverage +#endif + +/* * Not all versions of clang implement the type-generic versions * of the builtin overflow checkers. Fortunately, clang implements * __has_builtin allowing us to avoid awkward version diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 5d97ef738a57..cb9217fc60af 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -122,6 +122,12 @@ #define __no_sanitize_undefined #endif +#if defined(CONFIG_KCOV) && __has_attribute(__no_sanitize_coverage__) +#define __no_sanitize_coverage __attribute__((no_sanitize_coverage)) +#else +#define __no_sanitize_coverage +#endif + #if GCC_VERSION >= 50100 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index d509169860f1..e4ea86fc584d 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -210,7 +210,7 @@ struct ftrace_likely_data { /* Section for code which can't be instrumented at all */ #define noinstr \ noinline notrace __attribute((__section__(".noinstr.text"))) \ - __no_kcsan __no_sanitize_address __no_profile + __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage #endif /* __KERNEL__ */ diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 2a8ebe6c222e..f123e15d966e 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -10,8 +10,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf); int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, - struct vm_area_struct *vma); -void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd); + struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); +void huge_pmd_set_accessed(struct vm_fault *vmf); int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, pud_t *dst_pud, pud_t *src_pud, unsigned long addr, struct vm_area_struct *vma); @@ -24,7 +24,7 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) } #endif -vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd); +vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf); struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, unsigned int flags); @@ -115,9 +115,34 @@ extern struct kobj_attribute shmem_enabled_attr; extern unsigned long transparent_hugepage_flags; +static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, + unsigned long haddr) +{ + /* Don't have to check pgoff for anonymous vma */ + if (!vma_is_anonymous(vma)) { + if (!IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, + HPAGE_PMD_NR)) + return false; + } + + if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) + return false; + return true; +} + +static inline bool transhuge_vma_enabled(struct vm_area_struct *vma, + unsigned long vm_flags) +{ + /* Explicitly disabled through madvise. */ + if ((vm_flags & VM_NOHUGEPAGE) || + test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) + return false; + return true; +} + /* * to be used on vmas which are known to support THP. - * Use transparent_hugepage_enabled otherwise + * Use transparent_hugepage_active otherwise */ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) { @@ -128,15 +153,12 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_NEVER_DAX)) return false; - if (vma->vm_flags & VM_NOHUGEPAGE) + if (!transhuge_vma_enabled(vma, vma->vm_flags)) return false; if (vma_is_temporary_stack(vma)) return false; - if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) - return false; - if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG)) return true; @@ -150,24 +172,7 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) return false; } -bool transparent_hugepage_enabled(struct vm_area_struct *vma); - -#define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1) - -static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, - unsigned long haddr) -{ - /* Don't have to check pgoff for anonymous vma */ - if (!vma_is_anonymous(vma)) { - if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) != - (vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK)) - return false; - } - - if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) - return false; - return true; -} +bool transparent_hugepage_active(struct vm_area_struct *vma); #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ @@ -283,7 +288,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap); -vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd); +vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); extern struct page *huge_zero_page; extern unsigned long huge_zero_pfn; @@ -354,7 +359,7 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) return false; } -static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma) +static inline bool transparent_hugepage_active(struct vm_area_struct *vma) { return false; } @@ -365,6 +370,12 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, return false; } +static inline bool transhuge_vma_enabled(struct vm_area_struct *vma, + unsigned long vm_flags) +{ + return false; +} + static inline void prep_transhuge_page(struct page *page) {} static inline bool is_transparent_hugepage(struct page *page) @@ -430,8 +441,7 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, return NULL; } -static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, - pmd_t orig_pmd) +static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) { return 0; } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8ba79dc64ab8..8e0f32f935bd 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -29,6 +29,23 @@ typedef struct { unsigned long pd; } hugepd_t; #include <linux/shm.h> #include <asm/tlbflush.h> +/* + * For HugeTLB page, there are more metadata to save in the struct page. But + * the head struct page cannot meet our needs, so we have to abuse other tail + * struct page to store the metadata. In order to avoid conflicts caused by + * subsequent use of more tail struct pages, we gather these discrete indexes + * of tail struct page here. + */ +enum { + SUBPAGE_INDEX_SUBPOOL = 1, /* reuse page->private */ +#ifdef CONFIG_CGROUP_HUGETLB + SUBPAGE_INDEX_CGROUP, /* reuse page->private */ + SUBPAGE_INDEX_CGROUP_RSVD, /* reuse page->private */ + __MAX_CGROUP_SUBPAGE_INDEX = SUBPAGE_INDEX_CGROUP_RSVD, +#endif + __NR_USED_SUBPAGE, +}; + struct hugepage_subpool { spinlock_t lock; long count; @@ -515,12 +532,14 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, * modifications require hugetlb_lock. * HPG_freed - Set when page is on the free lists. * Synchronization: hugetlb_lock held for examination and modification. + * HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed. */ enum hugetlb_page_flags { HPG_restore_reserve = 0, HPG_migratable, HPG_temporary, HPG_freed, + HPG_vmemmap_optimized, __NR_HPAGEFLAGS, }; @@ -566,6 +585,7 @@ HPAGEFLAG(RestoreReserve, restore_reserve) HPAGEFLAG(Migratable, migratable) HPAGEFLAG(Temporary, temporary) HPAGEFLAG(Freed, freed) +HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) #ifdef CONFIG_HUGETLB_PAGE @@ -588,6 +608,9 @@ struct hstate { unsigned int nr_huge_pages_node[MAX_NUMNODES]; unsigned int free_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES]; +#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP + unsigned int nr_free_vmemmap_pages; +#endif #ifdef CONFIG_CGROUP_HUGETLB /* cgroup control files */ struct cftype cgroup_files_dfl[7]; @@ -635,13 +658,13 @@ extern unsigned int default_hstate_idx; */ static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) { - return (struct hugepage_subpool *)(hpage+1)->private; + return (void *)page_private(hpage + SUBPAGE_INDEX_SUBPOOL); } static inline void hugetlb_set_page_subpool(struct page *hpage, struct hugepage_subpool *subpool) { - set_page_private(hpage+1, (unsigned long)subpool); + set_page_private(hpage + SUBPAGE_INDEX_SUBPOOL, (unsigned long)subpool); } static inline struct hstate *hstate_file(struct file *f) @@ -718,8 +741,8 @@ static inline void arch_clear_hugepage_flags(struct page *page) { } #endif #ifndef arch_make_huge_pte -static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, - struct page *page, int writable) +static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, + vm_flags_t flags) { return entry; } @@ -875,6 +898,11 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; +static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) +{ + return NULL; +} + static inline int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) { @@ -1028,6 +1056,12 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr } #endif /* CONFIG_HUGETLB_PAGE */ +#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP +extern bool hugetlb_free_vmemmap_enabled; +#else +#define hugetlb_free_vmemmap_enabled false +#endif + static inline spinlock_t *huge_pte_lock(struct hstate *h, struct mm_struct *mm, pte_t *pte) { diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 0bff345c4bc6..0b8d1fdda3a1 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -21,15 +21,16 @@ struct hugetlb_cgroup; struct resv_map; struct file_region; +#ifdef CONFIG_CGROUP_HUGETLB /* * Minimum page order trackable by hugetlb cgroup. * At least 4 pages are necessary for all the tracking information. - * The second tail page (hpage[2]) is the fault usage cgroup. - * The third tail page (hpage[3]) is the reservation usage cgroup. + * The second tail page (hpage[SUBPAGE_INDEX_CGROUP]) is the fault + * usage cgroup. The third tail page (hpage[SUBPAGE_INDEX_CGROUP_RSVD]) + * is the reservation usage cgroup. */ -#define HUGETLB_CGROUP_MIN_ORDER 2 +#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__MAX_CGROUP_SUBPAGE_INDEX + 1) -#ifdef CONFIG_CGROUP_HUGETLB enum hugetlb_memory_event { HUGETLB_MAX, HUGETLB_NR_MEMORY_EVENTS, @@ -66,9 +67,9 @@ __hugetlb_cgroup_from_page(struct page *page, bool rsvd) if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return NULL; if (rsvd) - return (struct hugetlb_cgroup *)page[3].private; + return (void *)page_private(page + SUBPAGE_INDEX_CGROUP_RSVD); else - return (struct hugetlb_cgroup *)page[2].private; + return (void *)page_private(page + SUBPAGE_INDEX_CGROUP); } static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) @@ -90,9 +91,11 @@ static inline int __set_hugetlb_cgroup(struct page *page, if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return -1; if (rsvd) - page[3].private = (unsigned long)h_cg; + set_page_private(page + SUBPAGE_INDEX_CGROUP_RSVD, + (unsigned long)h_cg); else - page[2].private = (unsigned long)h_cg; + set_page_private(page + SUBPAGE_INDEX_CGROUP, + (unsigned long)h_cg); return 0; } diff --git a/include/linux/kcore.h b/include/linux/kcore.h index da676cdbd727..86c0f1d18998 100644 --- a/include/linux/kcore.h +++ b/include/linux/kcore.h @@ -11,14 +11,11 @@ enum kcore_type { KCORE_RAM, KCORE_VMEMMAP, KCORE_USER, - KCORE_OTHER, - KCORE_REMAP, }; struct kcore_list { struct list_head list; unsigned long addr; - unsigned long vaddr; size_t size; int type; }; diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f2ad8a53f71f..1b2f0a7e00d6 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -10,10 +10,12 @@ #include <linux/types.h> #include <linux/compiler.h> #include <linux/bitops.h> +#include <linux/kstrtox.h> #include <linux/log2.h> #include <linux/math.h> #include <linux/minmax.h> #include <linux/typecheck.h> +#include <linux/panic.h> #include <linux/printk.h> #include <linux/build_bug.h> #include <linux/static_call_types.h> @@ -84,7 +86,6 @@ #define lower_16_bits(n) ((u16)((n) & 0xffff)) struct completion; -struct pt_regs; struct user; #ifdef CONFIG_PREEMPT_VOLUNTARY @@ -189,159 +190,9 @@ void __might_fault(const char *file, int line); static inline void might_fault(void) { } #endif -extern struct atomic_notifier_head panic_notifier_list; -extern long (*panic_blink)(int state); -__printf(1, 2) -void panic(const char *fmt, ...) __noreturn __cold; -void nmi_panic(struct pt_regs *regs, const char *msg); -extern void oops_enter(void); -extern void oops_exit(void); -extern bool oops_may_print(void); void do_exit(long error_code) __noreturn; void complete_and_exit(struct completion *, long) __noreturn; -/* Internal, do not use. */ -int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); -int __must_check _kstrtol(const char *s, unsigned int base, long *res); - -int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res); -int __must_check kstrtoll(const char *s, unsigned int base, long long *res); - -/** - * kstrtoul - convert a string to an unsigned long - * @s: The start of the string. The string must be null-terminated, and may also - * include a single newline before its terminating null. The first character - * may also be a plus sign, but not a minus sign. - * @base: The number base to use. The maximum supported base is 16. If base is - * given as 0, then the base of the string is automatically detected with the - * conventional semantics - If it begins with 0x the number will be parsed as a - * hexadecimal (case insensitive), if it otherwise begins with 0, it will be - * parsed as an octal number. Otherwise it will be parsed as a decimal. - * @res: Where to write the result of the conversion on success. - * - * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Preferred over simple_strtoul(). Return code must be checked. -*/ -static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res) -{ - /* - * We want to shortcut function call, but - * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0. - */ - if (sizeof(unsigned long) == sizeof(unsigned long long) && - __alignof__(unsigned long) == __alignof__(unsigned long long)) - return kstrtoull(s, base, (unsigned long long *)res); - else - return _kstrtoul(s, base, res); -} - -/** - * kstrtol - convert a string to a long - * @s: The start of the string. The string must be null-terminated, and may also - * include a single newline before its terminating null. The first character - * may also be a plus sign or a minus sign. - * @base: The number base to use. The maximum supported base is 16. If base is - * given as 0, then the base of the string is automatically detected with the - * conventional semantics - If it begins with 0x the number will be parsed as a - * hexadecimal (case insensitive), if it otherwise begins with 0, it will be - * parsed as an octal number. Otherwise it will be parsed as a decimal. - * @res: Where to write the result of the conversion on success. - * - * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Preferred over simple_strtol(). Return code must be checked. - */ -static inline int __must_check kstrtol(const char *s, unsigned int base, long *res) -{ - /* - * We want to shortcut function call, but - * __builtin_types_compatible_p(long, long long) = 0. - */ - if (sizeof(long) == sizeof(long long) && - __alignof__(long) == __alignof__(long long)) - return kstrtoll(s, base, (long long *)res); - else - return _kstrtol(s, base, res); -} - -int __must_check kstrtouint(const char *s, unsigned int base, unsigned int *res); -int __must_check kstrtoint(const char *s, unsigned int base, int *res); - -static inline int __must_check kstrtou64(const char *s, unsigned int base, u64 *res) -{ - return kstrtoull(s, base, res); -} - -static inline int __must_check kstrtos64(const char *s, unsigned int base, s64 *res) -{ - return kstrtoll(s, base, res); -} - -static inline int __must_check kstrtou32(const char *s, unsigned int base, u32 *res) -{ - return kstrtouint(s, base, res); -} - -static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *res) -{ - return kstrtoint(s, base, res); -} - -int __must_check kstrtou16(const char *s, unsigned int base, u16 *res); -int __must_check kstrtos16(const char *s, unsigned int base, s16 *res); -int __must_check kstrtou8(const char *s, unsigned int base, u8 *res); -int __must_check kstrtos8(const char *s, unsigned int base, s8 *res); -int __must_check kstrtobool(const char *s, bool *res); - -int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res); -int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res); -int __must_check kstrtoul_from_user(const char __user *s, size_t count, unsigned int base, unsigned long *res); -int __must_check kstrtol_from_user(const char __user *s, size_t count, unsigned int base, long *res); -int __must_check kstrtouint_from_user(const char __user *s, size_t count, unsigned int base, unsigned int *res); -int __must_check kstrtoint_from_user(const char __user *s, size_t count, unsigned int base, int *res); -int __must_check kstrtou16_from_user(const char __user *s, size_t count, unsigned int base, u16 *res); -int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res); -int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res); -int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res); -int __must_check kstrtobool_from_user(const char __user *s, size_t count, bool *res); - -static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res) -{ - return kstrtoull_from_user(s, count, base, res); -} - -static inline int __must_check kstrtos64_from_user(const char __user *s, size_t count, unsigned int base, s64 *res) -{ - return kstrtoll_from_user(s, count, base, res); -} - -static inline int __must_check kstrtou32_from_user(const char __user *s, size_t count, unsigned int base, u32 *res) -{ - return kstrtouint_from_user(s, count, base, res); -} - -static inline int __must_check kstrtos32_from_user(const char __user *s, size_t count, unsigned int base, s32 *res) -{ - return kstrtoint_from_user(s, count, base, res); -} - -/* - * Use kstrto<foo> instead. - * - * NOTE: simple_strto<foo> does not check for the range overflow and, - * depending on the input, may give interesting results. - * - * Use these functions if and only if you cannot use kstrto<foo>, because - * the conversion ends on the first non-digit character, which may be far - * beyond the supported range. It might be useful to parse the strings like - * 10x50 or 12:21 without altering original string or temporary buffer in use. - * Keep in mind above caveat. - */ - -extern unsigned long simple_strtoul(const char *,char **,unsigned int); -extern long simple_strtol(const char *,char **,unsigned int); -extern unsigned long long simple_strtoull(const char *,char **,unsigned int); -extern long long simple_strtoll(const char *,char **,unsigned int); - extern int num_to_str(char *buf, int size, unsigned long long num, unsigned int width); @@ -384,52 +235,8 @@ extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); extern int func_ptr_is_kernel_text(void *ptr); -#ifdef CONFIG_SMP -extern unsigned int sysctl_oops_all_cpu_backtrace; -#else -#define sysctl_oops_all_cpu_backtrace 0 -#endif /* CONFIG_SMP */ - extern void bust_spinlocks(int yes); -extern int panic_timeout; -extern unsigned long panic_print; -extern int panic_on_oops; -extern int panic_on_unrecovered_nmi; -extern int panic_on_io_nmi; -extern int panic_on_warn; -extern unsigned long panic_on_taint; -extern bool panic_on_taint_nousertaint; -extern int sysctl_panic_on_rcu_stall; -extern int sysctl_max_rcu_stall_to_panic; -extern int sysctl_panic_on_stackoverflow; - -extern bool crash_kexec_post_notifiers; -/* - * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It - * holds a CPU number which is executing panic() currently. A value of - * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec(). - */ -extern atomic_t panic_cpu; -#define PANIC_CPU_INVALID -1 - -/* - * Only to be used by arch init code. If the user over-wrote the default - * CONFIG_PANIC_TIMEOUT, honor it. - */ -static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout) -{ - if (panic_timeout == arch_default_timeout) - panic_timeout = timeout; -} -extern const char *print_tainted(void); -enum lockdep_ok { - LOCKDEP_STILL_OK, - LOCKDEP_NOW_UNRELIABLE -}; -extern void add_taint(unsigned flag, enum lockdep_ok); -extern int test_taint(unsigned flag); -extern unsigned long get_taint(void); extern int root_mountflags; extern bool early_boot_irqs_disabled; @@ -448,36 +255,6 @@ extern enum system_states { SYSTEM_SUSPEND, } system_state; -/* This cannot be an enum because some may be used in assembly source. */ -#define TAINT_PROPRIETARY_MODULE 0 -#define TAINT_FORCED_MODULE 1 -#define TAINT_CPU_OUT_OF_SPEC 2 -#define TAINT_FORCED_RMMOD 3 -#define TAINT_MACHINE_CHECK 4 -#define TAINT_BAD_PAGE 5 -#define TAINT_USER 6 -#define TAINT_DIE 7 -#define TAINT_OVERRIDDEN_ACPI_TABLE 8 -#define TAINT_WARN 9 -#define TAINT_CRAP 10 -#define TAINT_FIRMWARE_WORKAROUND 11 -#define TAINT_OOT_MODULE 12 -#define TAINT_UNSIGNED_MODULE 13 -#define TAINT_SOFTLOCKUP 14 -#define TAINT_LIVEPATCH 15 -#define TAINT_AUX 16 -#define TAINT_RANDSTRUCT 17 -#define TAINT_FLAGS_COUNT 18 -#define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1) - -struct taint_flag { - char c_true; /* character printed when tainted */ - char c_false; /* character printed when not tainted */ - bool module; /* also show as a per-module taint flag */ -}; - -extern const struct taint_flag taint_flags[TAINT_FLAGS_COUNT]; - extern const char hex_asc[]; #define hex_asc_lo(x) hex_asc[((x) & 0x0f)] #define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4] diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 523ffc7bc3a8..4d0c28c2ba12 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -399,7 +399,6 @@ int enable_kprobe(struct kprobe *kp); void dump_kprobe(struct kprobe *kp); void *alloc_insn_page(void); -void free_insn_page(void *page); int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); diff --git a/include/linux/kstrtox.h b/include/linux/kstrtox.h new file mode 100644 index 000000000000..529974e22ea7 --- /dev/null +++ b/include/linux/kstrtox.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KSTRTOX_H +#define _LINUX_KSTRTOX_H + +#include <linux/compiler.h> +#include <linux/types.h> + +/* Internal, do not use. */ +int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); +int __must_check _kstrtol(const char *s, unsigned int base, long *res); + +int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res); +int __must_check kstrtoll(const char *s, unsigned int base, long long *res); + +/** + * kstrtoul - convert a string to an unsigned long + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign, but not a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Preferred over simple_strtoul(). Return code must be checked. +*/ +static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res) +{ + /* + * We want to shortcut function call, but + * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0. + */ + if (sizeof(unsigned long) == sizeof(unsigned long long) && + __alignof__(unsigned long) == __alignof__(unsigned long long)) + return kstrtoull(s, base, (unsigned long long *)res); + else + return _kstrtoul(s, base, res); +} + +/** + * kstrtol - convert a string to a long + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign or a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Preferred over simple_strtol(). Return code must be checked. + */ +static inline int __must_check kstrtol(const char *s, unsigned int base, long *res) +{ + /* + * We want to shortcut function call, but + * __builtin_types_compatible_p(long, long long) = 0. + */ + if (sizeof(long) == sizeof(long long) && + __alignof__(long) == __alignof__(long long)) + return kstrtoll(s, base, (long long *)res); + else + return _kstrtol(s, base, res); +} + +int __must_check kstrtouint(const char *s, unsigned int base, unsigned int *res); +int __must_check kstrtoint(const char *s, unsigned int base, int *res); + +static inline int __must_check kstrtou64(const char *s, unsigned int base, u64 *res) +{ + return kstrtoull(s, base, res); +} + +static inline int __must_check kstrtos64(const char *s, unsigned int base, s64 *res) +{ + return kstrtoll(s, base, res); +} + +static inline int __must_check kstrtou32(const char *s, unsigned int base, u32 *res) +{ + return kstrtouint(s, base, res); +} + +static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *res) +{ + return kstrtoint(s, base, res); +} + +int __must_check kstrtou16(const char *s, unsigned int base, u16 *res); +int __must_check kstrtos16(const char *s, unsigned int base, s16 *res); +int __must_check kstrtou8(const char *s, unsigned int base, u8 *res); +int __must_check kstrtos8(const char *s, unsigned int base, s8 *res); +int __must_check kstrtobool(const char *s, bool *res); + +int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res); +int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res); +int __must_check kstrtoul_from_user(const char __user *s, size_t count, unsigned int base, unsigned long *res); +int __must_check kstrtol_from_user(const char __user *s, size_t count, unsigned int base, long *res); +int __must_check kstrtouint_from_user(const char __user *s, size_t count, unsigned int base, unsigned int *res); +int __must_check kstrtoint_from_user(const char __user *s, size_t count, unsigned int base, int *res); +int __must_check kstrtou16_from_user(const char __user *s, size_t count, unsigned int base, u16 *res); +int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res); +int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res); +int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res); +int __must_check kstrtobool_from_user(const char __user *s, size_t count, bool *res); + +static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res) +{ + return kstrtoull_from_user(s, count, base, res); +} + +static inline int __must_check kstrtos64_from_user(const char __user *s, size_t count, unsigned int base, s64 *res) +{ + return kstrtoll_from_user(s, count, base, res); +} + +static inline int __must_check kstrtou32_from_user(const char __user *s, size_t count, unsigned int base, u32 *res) +{ + return kstrtouint_from_user(s, count, base, res); +} + +static inline int __must_check kstrtos32_from_user(const char __user *s, size_t count, unsigned int base, s32 *res) +{ + return kstrtoint_from_user(s, count, base, res); +} + +/* + * Use kstrto<foo> instead. + * + * NOTE: simple_strto<foo> does not check for the range overflow and, + * depending on the input, may give interesting results. + * + * Use these functions if and only if you cannot use kstrto<foo>, because + * the conversion ends on the first non-digit character, which may be far + * beyond the supported range. It might be useful to parse the strings like + * 10x50 or 12:21 without altering original string or temporary buffer in use. + * Keep in mind above caveat. + */ + +extern unsigned long simple_strtoul(const char *,char **,unsigned int); +extern long simple_strtol(const char *,char **,unsigned int); +extern unsigned long long simple_strtoull(const char *,char **,unsigned int); +extern long long simple_strtoll(const char *,char **,unsigned int); + +static inline int strtobool(const char *s, bool *res) +{ + return kstrtobool(s, res); +} + +#endif /* _LINUX_KSTRTOX_H */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 552309342c38..cbf46f56d105 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -30,7 +30,9 @@ extern unsigned long long max_possible_pfn; * @MEMBLOCK_NONE: no special request * @MEMBLOCK_HOTPLUG: hotpluggable region * @MEMBLOCK_MIRROR: mirrored region - * @MEMBLOCK_NOMAP: don't add to kernel direct mapping + * @MEMBLOCK_NOMAP: don't add to kernel direct mapping and treat as + * reserved in the memory map; refer to memblock_mark_nomap() description + * for further details */ enum memblock_flags { MEMBLOCK_NONE = 0x0, /* No special request */ diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 28f32fd00fe9..a7fd2c3ccb77 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -18,18 +18,6 @@ struct vmem_altmap; #ifdef CONFIG_MEMORY_HOTPLUG struct page *pfn_to_online_page(unsigned long pfn); -/* - * Types for free bootmem stored in page->lru.next. These have to be in - * some random range in unsigned long space for debugging purposes. - */ -enum { - MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12, - SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE, - MIX_SECTION_INFO, - NODE_INFO, - MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO, -}; - /* Types for control the zone type of onlined and offlined memory */ enum { /* Offline the memory. */ @@ -222,17 +210,6 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) #endif /* CONFIG_NUMA */ #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ -#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE -extern void __init register_page_bootmem_info_node(struct pglist_data *pgdat); -#else -static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) -{ -} -#endif -extern void put_page_bootmem(struct page *page); -extern void get_page_bootmem(unsigned long ingo, struct page *page, - unsigned long type); - void get_online_mems(void); void put_online_mems(void); @@ -260,10 +237,6 @@ static inline void zone_span_writelock(struct zone *zone) {} static inline void zone_span_writeunlock(struct zone *zone) {} static inline void zone_seqlock_init(struct zone *zone) {} -static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) -{ -} - static inline int try_online_node(int nid) { return 0; diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5f1c74df264d..0aaf91b496e2 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -46,11 +46,8 @@ struct mempolicy { atomic_t refcnt; unsigned short mode; /* See MPOL_* above */ unsigned short flags; /* See set_mempolicy() MPOL_F_* above */ - union { - short preferred_node; /* preferred */ - nodemask_t nodes; /* interleave/bind */ - /* undefined for default */ - } v; + nodemask_t nodes; /* interleave/bind/perfer */ + union { nodemask_t cpuset_mems_allowed; /* relative to these nodes */ nodemask_t user_nodemask; /* nodemask passed by user */ @@ -150,7 +147,7 @@ extern int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, nodemask_t **nodemask); extern bool init_nodemask_of_mempolicy(nodemask_t *mask); -extern bool mempolicy_nodemask_intersects(struct task_struct *tsk, +extern bool mempolicy_in_oom_domain(struct task_struct *tsk, const nodemask_t *mask); extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy); diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 45a79da89c5f..c0e9d35889e8 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -26,7 +26,7 @@ struct vmem_altmap { }; /* - * Specialize ZONE_DEVICE memory into multiple types each having differents + * Specialize ZONE_DEVICE memory into multiple types each has a different * usage. * * MEMORY_DEVICE_PRIVATE: diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 4bb4e519e3f5..9b7b7cd3bae9 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -51,6 +51,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); +extern void copy_huge_page(struct page *dst, struct page *src); #else static inline void putback_movable_pages(struct list_head *l) {} @@ -77,6 +78,9 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, return -ENOSYS; } +static inline void copy_huge_page(struct page *dst, struct page *src) +{ +} #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_COMPACTION @@ -95,14 +99,9 @@ static inline void __ClearPageMovable(struct page *page) #endif #ifdef CONFIG_NUMA_BALANCING -extern bool pmd_trans_migrating(pmd_t pmd); extern int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, int node); #else -static inline bool pmd_trans_migrating(pmd_t pmd) -{ - return false; -} static inline int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, int node) { @@ -110,24 +109,6 @@ static inline int migrate_misplaced_page(struct page *page, } #endif /* CONFIG_NUMA_BALANCING */ -#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE) -extern int migrate_misplaced_transhuge_page(struct mm_struct *mm, - struct vm_area_struct *vma, - pmd_t *pmd, pmd_t entry, - unsigned long address, - struct page *page, int node); -#else -static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm, - struct vm_area_struct *vma, - pmd_t *pmd, pmd_t entry, - unsigned long address, - struct page *page, int node) -{ - return -EAGAIN; -} -#endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/ - - #ifdef CONFIG_MIGRATION /* diff --git a/include/linux/mm.h b/include/linux/mm.h index b8bc39237dac..788a0b1323d0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -145,7 +145,7 @@ extern int mmap_rnd_compat_bits __read_mostly; /* This function must be updated when the size of struct page grows above 80 * or reduces below 56. The idea that compiler optimizes out switch() * statement, and only leaves move/store instructions. Also the compiler can - * combine write statments if they are both assignments and can be reordered, + * combine write statements if they are both assignments and can be reordered, * this can result in several of the writes here being dropped. */ #define mm_zero_struct_page(pp) __mm_zero_struct_page(pp) @@ -540,7 +540,12 @@ struct vm_fault { pud_t *pud; /* Pointer to pud entry matching * the 'address' */ - pte_t orig_pte; /* Value of PTE at the time of fault */ + union { + pte_t orig_pte; /* Value of PTE at the time of fault */ + pmd_t orig_pmd; /* Value of PMD at the time of fault, + * used by PMD fault only. + */ + }; struct page *cow_page; /* Page handler may use for COW fault */ struct page *page; /* ->fault handlers should return a @@ -3067,6 +3072,11 @@ static inline void print_vma_addr(char *prefix, unsigned long rip) } #endif +int vmemmap_remap_free(unsigned long start, unsigned long end, + unsigned long reuse); +int vmemmap_remap_alloc(unsigned long start, unsigned long end, + unsigned long reuse, gfp_t gfp_mask); + void *sparse_buffer_alloc(unsigned long size); struct page * __populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d33d97c69da9..52bbd2b7cb46 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -404,7 +404,7 @@ struct mm_struct { unsigned long mmap_base; /* base of mmap area */ unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES - /* Base adresses for compatible mmap() */ + /* Base addresses for compatible mmap() */ unsigned long mmap_compat_base; unsigned long mmap_compat_legacy_base; #endif diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 1a6a9eb6d3fa..6692da8d121d 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -41,7 +41,12 @@ struct mmu_interval_notifier; * * @MMU_NOTIFY_MIGRATE: used during migrate_vma_collect() invalidate to signal * a device driver to possibly ignore the invalidation if the - * migrate_pgmap_owner field matches the driver's device private pgmap owner. + * owner field matches the driver's device private pgmap owner. + * + * @MMU_NOTIFY_EXCLUSIVE: to signal a device driver that the device will no + * longer have exclusive access to the page. When sent during creation of an + * exclusive range the owner will be initialised to the value provided by the + * caller of make_device_exclusive_range(), otherwise the owner will be NULL. */ enum mmu_notifier_event { MMU_NOTIFY_UNMAP = 0, @@ -51,6 +56,7 @@ enum mmu_notifier_event { MMU_NOTIFY_SOFT_DIRTY, MMU_NOTIFY_RELEASE, MMU_NOTIFY_MIGRATE, + MMU_NOTIFY_EXCLUSIVE, }; #define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) @@ -269,7 +275,7 @@ struct mmu_notifier_range { unsigned long end; unsigned flags; enum mmu_notifier_event event; - void *migrate_pgmap_owner; + void *owner; }; static inline int mm_has_notifiers(struct mm_struct *mm) @@ -521,14 +527,14 @@ static inline void mmu_notifier_range_init(struct mmu_notifier_range *range, range->flags = flags; } -static inline void mmu_notifier_range_init_migrate( - struct mmu_notifier_range *range, unsigned int flags, +static inline void mmu_notifier_range_init_owner( + struct mmu_notifier_range *range, + enum mmu_notifier_event event, unsigned int flags, struct vm_area_struct *vma, struct mm_struct *mm, - unsigned long start, unsigned long end, void *pgmap) + unsigned long start, unsigned long end, void *owner) { - mmu_notifier_range_init(range, MMU_NOTIFY_MIGRATE, flags, vma, mm, - start, end); - range->migrate_pgmap_owner = pgmap; + mmu_notifier_range_init(range, event, flags, vma, mm, start, end); + range->owner = owner; } #define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ @@ -655,8 +661,8 @@ static inline void _mmu_notifier_range_init(struct mmu_notifier_range *range, #define mmu_notifier_range_init(range,event,flags,vma,mm,start,end) \ _mmu_notifier_range_init(range, start, end) -#define mmu_notifier_range_init_migrate(range, flags, vma, mm, start, end, \ - pgmap) \ +#define mmu_notifier_range_init_owner(range, event, flags, vma, mm, start, \ + end, owner) \ _mmu_notifier_range_init(range, start, end) static inline bool diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 265a32e1ff74..fcb535560028 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -114,7 +114,7 @@ static inline bool free_area_empty(struct free_area *area, int migratetype) struct pglist_data; /* - * Add a wild amount of padding here to ensure datas fall into separate + * Add a wild amount of padding here to ensure data fall into separate * cachelines. There are very few zone structures in the machine, so space * consumption is not a concern here. */ @@ -1064,7 +1064,10 @@ extern char numa_zonelist_order[]; #ifndef CONFIG_NUMA extern struct pglist_data contig_page_data; -#define NODE_DATA(nid) (&contig_page_data) +static inline struct pglist_data *NODE_DATA(int nid) +{ + return &contig_page_data; +} #define NODE_MEM_MAP(nid) mem_map #else /* CONFIG_NUMA */ @@ -1445,10 +1448,30 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) #endif #ifndef CONFIG_HAVE_ARCH_PFN_VALID +/** + * pfn_valid - check if there is a valid memory map entry for a PFN + * @pfn: the page frame number to check + * + * Check if there is a valid memory map entry aka struct page for the @pfn. + * Note, that availability of the memory map entry does not imply that + * there is actual usable memory at that @pfn. The struct page may + * represent a hole or an unusable page frame. + * + * Return: 1 for PFNs that have memory map entries and 0 otherwise + */ static inline int pfn_valid(unsigned long pfn) { struct mem_section *ms; + /* + * Ensure the upper PAGE_SHIFT bits are clear in the + * pfn. Else it might lead to false positives when + * some of the upper bits are set, but the lower bits + * match a valid pfn. + */ + if (PHYS_PFN(PFN_PHYS(pfn)) != pfn) + return 0; + if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; ms = __nr_to_section(pfn_to_section_nr(pfn)); diff --git a/include/linux/mpi.h b/include/linux/mpi.h index 3e5358f4de2f..eb0d1c1db208 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -200,7 +200,7 @@ struct mpi_ec_ctx { unsigned int nbits; /* Number of bits. */ /* Domain parameters. Note that they may not all be set and if set - * the MPIs may be flaged as constant. + * the MPIs may be flagged as constant. */ MPI p; /* Prime specifying the field GF(p). */ MPI a; /* First coefficient of the Weierstrass equation. */ @@ -267,7 +267,7 @@ int mpi_ec_curve_point(MPI_POINT point, struct mpi_ec_ctx *ctx); /** * mpi_get_size() - returns max size required to store the number * - * @a: A multi precision integer for which we want to allocate a bufer + * @a: A multi precision integer for which we want to allocate a buffer * * Return: size required to store the number */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 458696550028..5922031ffab6 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -704,6 +704,18 @@ PAGEFLAG_FALSE(DoubleMap) #endif /* + * Check if a page is currently marked HWPoisoned. Note that this check is + * best effort only and inherently racy: there is no way to synchronize with + * failing hardware. + */ +static inline bool is_page_hwpoison(struct page *page) +{ + if (PageHWPoison(page)) + return true; + return PageHuge(page) && PageHWPoison(compound_head(page)); +} + +/* * For pages that are never mapped to userspace (and aren't PageSlab), * page_type may be used. Because it is initialised to -1, we invert the * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and @@ -766,9 +778,19 @@ PAGE_TYPE_OPS(Buddy, buddy) * relies on this feature is aware that re-onlining the memory block will * require to re-set the pages PageOffline() and not giving them to the * buddy via online_page_callback_t. + * + * There are drivers that mark a page PageOffline() and expect there won't be + * any further access to page content. PFN walkers that read content of random + * pages should check PageOffline() and synchronize with such drivers using + * page_offline_freeze()/page_offline_thaw(). */ PAGE_TYPE_OPS(Offline, offline) +extern void page_offline_freeze(void); +extern void page_offline_thaw(void); +extern void page_offline_begin(void); +extern void page_offline_end(void); + /* * Marks pages in use as page tables. */ diff --git a/include/linux/panic.h b/include/linux/panic.h new file mode 100644 index 000000000000..f5844908a089 --- /dev/null +++ b/include/linux/panic.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PANIC_H +#define _LINUX_PANIC_H + +#include <linux/compiler_attributes.h> +#include <linux/types.h> + +struct pt_regs; + +extern long (*panic_blink)(int state); +__printf(1, 2) +void panic(const char *fmt, ...) __noreturn __cold; +void nmi_panic(struct pt_regs *regs, const char *msg); +extern void oops_enter(void); +extern void oops_exit(void); +extern bool oops_may_print(void); + +#ifdef CONFIG_SMP +extern unsigned int sysctl_oops_all_cpu_backtrace; +#else +#define sysctl_oops_all_cpu_backtrace 0 +#endif /* CONFIG_SMP */ + +extern int panic_timeout; +extern unsigned long panic_print; +extern int panic_on_oops; +extern int panic_on_unrecovered_nmi; +extern int panic_on_io_nmi; +extern int panic_on_warn; + +extern unsigned long panic_on_taint; +extern bool panic_on_taint_nousertaint; + +extern int sysctl_panic_on_rcu_stall; +extern int sysctl_max_rcu_stall_to_panic; +extern int sysctl_panic_on_stackoverflow; + +extern bool crash_kexec_post_notifiers; + +/* + * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It + * holds a CPU number which is executing panic() currently. A value of + * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec(). + */ +extern atomic_t panic_cpu; +#define PANIC_CPU_INVALID -1 + +/* + * Only to be used by arch init code. If the user over-wrote the default + * CONFIG_PANIC_TIMEOUT, honor it. + */ +static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout) +{ + if (panic_timeout == arch_default_timeout) + panic_timeout = timeout; +} + +/* This cannot be an enum because some may be used in assembly source. */ +#define TAINT_PROPRIETARY_MODULE 0 +#define TAINT_FORCED_MODULE 1 +#define TAINT_CPU_OUT_OF_SPEC 2 +#define TAINT_FORCED_RMMOD 3 +#define TAINT_MACHINE_CHECK 4 +#define TAINT_BAD_PAGE 5 +#define TAINT_USER 6 +#define TAINT_DIE 7 +#define TAINT_OVERRIDDEN_ACPI_TABLE 8 +#define TAINT_WARN 9 +#define TAINT_CRAP 10 +#define TAINT_FIRMWARE_WORKAROUND 11 +#define TAINT_OOT_MODULE 12 +#define TAINT_UNSIGNED_MODULE 13 +#define TAINT_SOFTLOCKUP 14 +#define TAINT_LIVEPATCH 15 +#define TAINT_AUX 16 +#define TAINT_RANDSTRUCT 17 +#define TAINT_FLAGS_COUNT 18 +#define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1) + +struct taint_flag { + char c_true; /* character printed when tainted */ + char c_false; /* character printed when not tainted */ + bool module; /* also show as a per-module taint flag */ +}; + +extern const struct taint_flag taint_flags[TAINT_FLAGS_COUNT]; + +enum lockdep_ok { + LOCKDEP_STILL_OK, + LOCKDEP_NOW_UNRELIABLE, +}; + +extern const char *print_tainted(void); +extern void add_taint(unsigned flag, enum lockdep_ok); +extern int test_taint(unsigned flag); +extern unsigned long get_taint(void); + +#endif /* _LINUX_PANIC_H */ diff --git a/include/linux/panic_notifier.h b/include/linux/panic_notifier.h new file mode 100644 index 000000000000..41e32483d7a7 --- /dev/null +++ b/include/linux/panic_notifier.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PANIC_NOTIFIERS_H +#define _LINUX_PANIC_NOTIFIERS_H + +#include <linux/notifier.h> +#include <linux/types.h> + +extern struct atomic_notifier_head panic_notifier_list; + +extern bool crash_kexec_post_notifiers; + +#endif /* _LINUX_PANIC_NOTIFIERS_H */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index c32600c9e1ad..e82660f7b9e4 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -29,6 +29,24 @@ #endif /* + * This defines the first usable user address. Platforms + * can override its value with custom FIRST_USER_ADDRESS + * defined in their respective <asm/pgtable.h>. + */ +#ifndef FIRST_USER_ADDRESS +#define FIRST_USER_ADDRESS 0UL +#endif + +/* + * This defines the generic helper for accessing PMD page + * table page. Although platforms can still override this + * via their respective <asm/pgtable.h>. + */ +#ifndef pmd_pgtable +#define pmd_pgtable(pmd) pmd_page(pmd) +#endif + +/* * A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD] * * The pXx_index() functions return the index of the entry in the page @@ -1379,10 +1397,34 @@ static inline int p4d_clear_huge(p4d_t *p4d) } #endif /* !__PAGETABLE_P4D_FOLDED */ +#ifndef __PAGETABLE_PUD_FOLDED int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); -int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pud_clear_huge(pud_t *pud); +#else +static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) +{ + return 0; +} +static inline int pud_clear_huge(pud_t *pud) +{ + return 0; +} +#endif /* !__PAGETABLE_PUD_FOLDED */ + +#ifndef __PAGETABLE_PMD_FOLDED +int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pmd_clear_huge(pmd_t *pmd); +#else +static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) +{ + return 0; +} +static inline int pmd_clear_huge(pmd_t *pmd) +{ + return 0; +} +#endif /* !__PAGETABLE_PMD_FOLDED */ + int p4d_free_pud_page(p4d_t *p4d, unsigned long addr); int pud_free_pmd_page(pud_t *pud, unsigned long addr); int pmd_free_pte_page(pmd_t *pmd, unsigned long addr); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 8d04e7deedc6..83fb86133fe1 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -86,9 +86,6 @@ struct anon_vma_chain { }; enum ttu_flags { - TTU_MIGRATION = 0x1, /* migration mode */ - TTU_MUNLOCK = 0x2, /* munlock mode */ - TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */ @@ -98,7 +95,6 @@ enum ttu_flags { * do a final flush if necessary */ TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock: * caller holds it */ - TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */ }; #ifdef CONFIG_MMU @@ -195,7 +191,12 @@ static inline void page_dup_rmap(struct page *page, bool compound) int page_referenced(struct page *, int is_locked, struct mem_cgroup *memcg, unsigned long *vm_flags); -bool try_to_unmap(struct page *, enum ttu_flags flags); +void try_to_migrate(struct page *page, enum ttu_flags flags); +void try_to_unmap(struct page *, enum ttu_flags flags); + +int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, + unsigned long end, struct page **pages, + void *arg); /* Avoid racy checks */ #define PVMW_SYNC (1 << 0) @@ -240,7 +241,7 @@ int page_mkclean(struct page *); * called in munlock()/munmap() path to check for other vmas holding * the page mlocked. */ -void try_to_munlock(struct page *); +void page_mlock(struct page *page); void remove_migration_ptes(struct page *old, struct page *new, bool locked); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 723b1fa1177e..dd99569595fd 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -126,8 +126,16 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num void seq_put_hex_ll(struct seq_file *m, const char *delimiter, unsigned long long v, unsigned int width); +void seq_escape_mem(struct seq_file *m, const char *src, size_t len, + unsigned int flags, const char *esc); + +static inline void seq_escape_str(struct seq_file *m, const char *src, + unsigned int flags, const char *esc) +{ + seq_escape_mem(m, src, strlen(src), flags, esc); +} + void seq_escape(struct seq_file *m, const char *s, const char *esc); -void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz); void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index aa77dcd1646f..8e775ce517bb 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -122,21 +122,18 @@ static inline bool shmem_file(struct file *file) extern bool shmem_charge(struct inode *inode, long pages); extern void shmem_uncharge(struct inode *inode, long pages); +#ifdef CONFIG_USERFAULTFD #ifdef CONFIG_SHMEM -extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, +extern int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, + bool zeropage, struct page **pagep); -extern int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm, - pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, - unsigned long dst_addr); -#else -#define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ - src_addr, pagep) ({ BUG(); 0; }) -#define shmem_mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, \ - dst_addr) ({ BUG(); 0; }) -#endif +#else /* !CONFIG_SHMEM */ +#define shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, \ + src_addr, zeropage, pagep) ({ BUG(); 0; }) +#endif /* CONFIG_SHMEM */ +#endif /* CONFIG_USERFAULTFD */ #endif diff --git a/include/linux/signal.h b/include/linux/signal.h index 5160fd45e5ca..3454c7ff0778 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -462,8 +462,6 @@ int __save_altstack(stack_t __user *, unsigned long); unsafe_put_user((void __user *)t->sas_ss_sp, &__uss->ss_sp, label); \ unsafe_put_user(t->sas_ss_flags, &__uss->ss_flags, label); \ unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \ - if (t->sas_ss_flags & SS_AUTODISARM) \ - sas_ss_reset(t); \ } while (0); #ifdef CONFIG_PROC_FS diff --git a/include/linux/string.h b/include/linux/string.h index 9521d8cab18e..b48d2d28e0b1 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -2,7 +2,6 @@ #ifndef _LINUX_STRING_H_ #define _LINUX_STRING_H_ - #include <linux/compiler.h> /* for inline */ #include <linux/types.h> /* for size_t */ #include <linux/stddef.h> /* for NULL */ @@ -184,12 +183,6 @@ extern char **argv_split(gfp_t gfp, const char *str, int *argcp); extern void argv_free(char **argv); extern bool sysfs_streq(const char *s1, const char *s2); -extern int kstrtobool(const char *s, bool *res); -static inline int strtobool(const char *s, bool *res) -{ - return kstrtobool(s, res); -} - int match_string(const char * const *array, size_t n, const char *string); int __sysfs_match_string(const char * const *array, size_t n, const char *s); diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index fa06dcdc481e..68189c4a2eb1 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -2,6 +2,7 @@ #ifndef _LINUX_STRING_HELPERS_H_ #define _LINUX_STRING_HELPERS_H_ +#include <linux/bits.h> #include <linux/ctype.h> #include <linux/types.h> @@ -18,13 +19,15 @@ enum string_size_units { void string_get_size(u64 size, u64 blk_size, enum string_size_units units, char *buf, int len); -#define UNESCAPE_SPACE 0x01 -#define UNESCAPE_OCTAL 0x02 -#define UNESCAPE_HEX 0x04 -#define UNESCAPE_SPECIAL 0x08 +#define UNESCAPE_SPACE BIT(0) +#define UNESCAPE_OCTAL BIT(1) +#define UNESCAPE_HEX BIT(2) +#define UNESCAPE_SPECIAL BIT(3) #define UNESCAPE_ANY \ (UNESCAPE_SPACE | UNESCAPE_OCTAL | UNESCAPE_HEX | UNESCAPE_SPECIAL) +#define UNESCAPE_ALL_MASK GENMASK(3, 0) + int string_unescape(char *src, char *dst, size_t size, unsigned int flags); static inline int string_unescape_inplace(char *buf, unsigned int flags) @@ -42,22 +45,24 @@ static inline int string_unescape_any_inplace(char *buf) return string_unescape_any(buf, buf, 0); } -#define ESCAPE_SPACE 0x01 -#define ESCAPE_SPECIAL 0x02 -#define ESCAPE_NULL 0x04 -#define ESCAPE_OCTAL 0x08 +#define ESCAPE_SPACE BIT(0) +#define ESCAPE_SPECIAL BIT(1) +#define ESCAPE_NULL BIT(2) +#define ESCAPE_OCTAL BIT(3) #define ESCAPE_ANY \ (ESCAPE_SPACE | ESCAPE_OCTAL | ESCAPE_SPECIAL | ESCAPE_NULL) -#define ESCAPE_NP 0x10 +#define ESCAPE_NP BIT(4) #define ESCAPE_ANY_NP (ESCAPE_ANY | ESCAPE_NP) -#define ESCAPE_HEX 0x20 +#define ESCAPE_HEX BIT(5) +#define ESCAPE_NA BIT(6) +#define ESCAPE_NAP BIT(7) +#define ESCAPE_APPEND BIT(8) + +#define ESCAPE_ALL_MASK GENMASK(8, 0) int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, unsigned int flags, const char *only); -int string_escape_mem_ascii(const char *src, size_t isz, char *dst, - size_t osz); - static inline int string_escape_mem_any_np(const char *src, size_t isz, char *dst, size_t osz, const char *only) { diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index d0965e2997b0..b134b2b3371c 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -14,6 +14,7 @@ #include <linux/kref.h> #include <linux/slab.h> #include <linux/atomic.h> +#include <linux/kstrtox.h> #include <linux/proc_fs.h> /* diff --git a/include/linux/swap.h b/include/linux/swap.h index 49b1dd2c100b..6f5a43251593 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -62,12 +62,17 @@ static inline int current_is_kswapd(void) * migrate part of a process memory to device memory. * * When a page is migrated from CPU to device, we set the CPU page table entry - * to a special SWP_DEVICE_* entry. + * to a special SWP_DEVICE_{READ|WRITE} entry. + * + * When a page is mapped by the device for exclusive access we set the CPU page + * table entries to special SWP_DEVICE_EXCLUSIVE_* entries. */ #ifdef CONFIG_DEVICE_PRIVATE -#define SWP_DEVICE_NUM 2 +#define SWP_DEVICE_NUM 4 #define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM) #define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1) +#define SWP_DEVICE_EXCLUSIVE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2) +#define SWP_DEVICE_EXCLUSIVE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3) #else #define SWP_DEVICE_NUM 0 #endif @@ -537,7 +542,11 @@ static inline void put_swap_device(struct swap_info_struct *si) { } -#define swap_address_space(entry) (NULL) +static inline struct address_space *swap_address_space(swp_entry_t entry) +{ + return NULL; +} + #define get_nr_swap_pages() 0L #define total_swap_pages 0L #define total_swapcache_pages() 0UL @@ -560,8 +569,8 @@ static inline void show_swap_cache_info(void) { } -#define free_swap_and_cache(e) ({(is_migration_entry(e) || is_device_private_entry(e));}) -#define swapcache_prepare(e) ({(is_migration_entry(e) || is_device_private_entry(e));}) +/* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */ +#define free_swap_and_cache(e) is_pfn_swap_entry(e) static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask) { diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 5907205c712c..d356ab4047f7 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -107,10 +107,14 @@ static inline void *swp_to_radix_entry(swp_entry_t entry) } #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) -static inline swp_entry_t make_device_private_entry(struct page *page, bool write) +static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) { - return swp_entry(write ? SWP_DEVICE_WRITE : SWP_DEVICE_READ, - page_to_pfn(page)); + return swp_entry(SWP_DEVICE_READ, offset); +} + +static inline swp_entry_t make_writable_device_private_entry(pgoff_t offset) +{ + return swp_entry(SWP_DEVICE_WRITE, offset); } static inline bool is_device_private_entry(swp_entry_t entry) @@ -119,33 +123,40 @@ static inline bool is_device_private_entry(swp_entry_t entry) return type == SWP_DEVICE_READ || type == SWP_DEVICE_WRITE; } -static inline void make_device_private_entry_read(swp_entry_t *entry) +static inline bool is_writable_device_private_entry(swp_entry_t entry) { - *entry = swp_entry(SWP_DEVICE_READ, swp_offset(*entry)); + return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); } -static inline bool is_write_device_private_entry(swp_entry_t entry) +static inline swp_entry_t make_readable_device_exclusive_entry(pgoff_t offset) { - return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); + return swp_entry(SWP_DEVICE_EXCLUSIVE_READ, offset); } -static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry) +static inline swp_entry_t make_writable_device_exclusive_entry(pgoff_t offset) { - return swp_offset(entry); + return swp_entry(SWP_DEVICE_EXCLUSIVE_WRITE, offset); +} + +static inline bool is_device_exclusive_entry(swp_entry_t entry) +{ + return swp_type(entry) == SWP_DEVICE_EXCLUSIVE_READ || + swp_type(entry) == SWP_DEVICE_EXCLUSIVE_WRITE; } -static inline struct page *device_private_entry_to_page(swp_entry_t entry) +static inline bool is_writable_device_exclusive_entry(swp_entry_t entry) { - return pfn_to_page(swp_offset(entry)); + return unlikely(swp_type(entry) == SWP_DEVICE_EXCLUSIVE_WRITE); } #else /* CONFIG_DEVICE_PRIVATE */ -static inline swp_entry_t make_device_private_entry(struct page *page, bool write) +static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) { return swp_entry(0, 0); } -static inline void make_device_private_entry_read(swp_entry_t *entry) +static inline swp_entry_t make_writable_device_private_entry(pgoff_t offset) { + return swp_entry(0, 0); } static inline bool is_device_private_entry(swp_entry_t entry) @@ -153,61 +164,52 @@ static inline bool is_device_private_entry(swp_entry_t entry) return false; } -static inline bool is_write_device_private_entry(swp_entry_t entry) +static inline bool is_writable_device_private_entry(swp_entry_t entry) { return false; } -static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry) +static inline swp_entry_t make_readable_device_exclusive_entry(pgoff_t offset) { - return 0; + return swp_entry(0, 0); } -static inline struct page *device_private_entry_to_page(swp_entry_t entry) +static inline swp_entry_t make_writable_device_exclusive_entry(pgoff_t offset) { - return NULL; + return swp_entry(0, 0); } -#endif /* CONFIG_DEVICE_PRIVATE */ -#ifdef CONFIG_MIGRATION -static inline swp_entry_t make_migration_entry(struct page *page, int write) +static inline bool is_device_exclusive_entry(swp_entry_t entry) { - BUG_ON(!PageLocked(compound_head(page))); + return false; +} - return swp_entry(write ? SWP_MIGRATION_WRITE : SWP_MIGRATION_READ, - page_to_pfn(page)); +static inline bool is_writable_device_exclusive_entry(swp_entry_t entry) +{ + return false; } +#endif /* CONFIG_DEVICE_PRIVATE */ +#ifdef CONFIG_MIGRATION static inline int is_migration_entry(swp_entry_t entry) { return unlikely(swp_type(entry) == SWP_MIGRATION_READ || swp_type(entry) == SWP_MIGRATION_WRITE); } -static inline int is_write_migration_entry(swp_entry_t entry) +static inline int is_writable_migration_entry(swp_entry_t entry) { return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE); } -static inline unsigned long migration_entry_to_pfn(swp_entry_t entry) +static inline swp_entry_t make_readable_migration_entry(pgoff_t offset) { - return swp_offset(entry); + return swp_entry(SWP_MIGRATION_READ, offset); } -static inline struct page *migration_entry_to_page(swp_entry_t entry) +static inline swp_entry_t make_writable_migration_entry(pgoff_t offset) { - struct page *p = pfn_to_page(swp_offset(entry)); - /* - * Any use of migration entries may only occur while the - * corresponding page is locked - */ - BUG_ON(!PageLocked(compound_head(p))); - return p; -} - -static inline void make_migration_entry_read(swp_entry_t *entry) -{ - *entry = swp_entry(SWP_MIGRATION_READ, swp_offset(*entry)); + return swp_entry(SWP_MIGRATION_WRITE, offset); } extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, @@ -217,37 +219,58 @@ extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, extern void migration_entry_wait_huge(struct vm_area_struct *vma, struct mm_struct *mm, pte_t *pte); #else - -#define make_migration_entry(page, write) swp_entry(0, 0) -static inline int is_migration_entry(swp_entry_t swp) +static inline swp_entry_t make_readable_migration_entry(pgoff_t offset) { - return 0; + return swp_entry(0, 0); } -static inline unsigned long migration_entry_to_pfn(swp_entry_t entry) +static inline swp_entry_t make_writable_migration_entry(pgoff_t offset) { - return 0; + return swp_entry(0, 0); } -static inline struct page *migration_entry_to_page(swp_entry_t entry) +static inline int is_migration_entry(swp_entry_t swp) { - return NULL; + return 0; } -static inline void make_migration_entry_read(swp_entry_t *entryp) { } static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, spinlock_t *ptl) { } static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } static inline void migration_entry_wait_huge(struct vm_area_struct *vma, struct mm_struct *mm, pte_t *pte) { } -static inline int is_write_migration_entry(swp_entry_t entry) +static inline int is_writable_migration_entry(swp_entry_t entry) { return 0; } #endif +static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) +{ + struct page *p = pfn_to_page(swp_offset(entry)); + + /* + * Any use of migration entries may only occur while the + * corresponding page is locked + */ + BUG_ON(is_migration_entry(entry) && !PageLocked(p)); + + return p; +} + +/* + * A pfn swap entry is a special type of swap entry that always has a pfn stored + * in the swap offset. They are used to represent unaddressable device memory + * and to restrict access to a page undergoing migration. + */ +static inline bool is_pfn_swap_entry(swp_entry_t entry) +{ + return is_migration_entry(entry) || is_device_private_entry(entry) || + is_device_exclusive_entry(entry); +} + struct page_vma_mapped_walk; #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION @@ -265,6 +288,8 @@ static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) if (pmd_swp_soft_dirty(pmd)) pmd = pmd_swp_clear_soft_dirty(pmd); + if (pmd_swp_uffd_wp(pmd)) + pmd = pmd_swp_clear_uffd_wp(pmd); arch_entry = __pmd_to_swp_entry(pmd); return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); } diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 157762db9d4b..0999f6317978 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -9,6 +9,7 @@ #define _LINUX_THREAD_INFO_H #include <linux/types.h> +#include <linux/limits.h> #include <linux/bug.h> #include <linux/restart_block.h> #include <linux/errno.h> diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 794d1538b8ba..331d2ccf0bcc 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -53,6 +53,11 @@ enum mcopy_atomic_mode { MCOPY_ATOMIC_CONTINUE, }; +extern int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, struct page *page, + bool newly_allocated, bool wp_copy); + extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, bool *mmap_changing, __u64 mode); diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index bfaaf0b6fa76..1dabd6f22486 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -104,6 +104,21 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot) } #endif +#ifndef arch_vmap_pte_range_map_size +static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end, + u64 pfn, unsigned int max_page_shift) +{ + return PAGE_SIZE; +} +#endif + +#ifndef arch_vmap_pte_supported_shift +static inline int arch_vmap_pte_supported_shift(unsigned long size) +{ + return PAGE_SHIFT; +} +#endif + /* * Highlevel APIs for driver use */ diff --git a/include/linux/zbud.h b/include/linux/zbud.h deleted file mode 100644 index b1eaf6e31735..000000000000 --- a/include/linux/zbud.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ZBUD_H_ -#define _ZBUD_H_ - -#include <linux/types.h> - -struct zbud_pool; - -struct zbud_ops { - int (*evict)(struct zbud_pool *pool, unsigned long handle); -}; - -struct zbud_pool *zbud_create_pool(gfp_t gfp, const struct zbud_ops *ops); -void zbud_destroy_pool(struct zbud_pool *pool); -int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp, - unsigned long *handle); -void zbud_free(struct zbud_pool *pool, unsigned long handle); -int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries); -void *zbud_map(struct zbud_pool *pool, unsigned long handle); -void zbud_unmap(struct zbud_pool *pool, unsigned long handle); -u64 zbud_get_pool_size(struct zbud_pool *pool); - -#endif /* _ZBUD_H_ */ diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 00d1180527d8..88faf2400ec2 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -423,47 +423,6 @@ TRACE_EVENT(mm_vmscan_lru_shrink_active, show_reclaim_flags(__entry->reclaim_flags)) ); -TRACE_EVENT(mm_vmscan_inactive_list_is_low, - - TP_PROTO(int nid, int reclaim_idx, - unsigned long total_inactive, unsigned long inactive, - unsigned long total_active, unsigned long active, - unsigned long ratio, int file), - - TP_ARGS(nid, reclaim_idx, total_inactive, inactive, total_active, active, ratio, file), - - TP_STRUCT__entry( - __field(int, nid) - __field(int, reclaim_idx) - __field(unsigned long, total_inactive) - __field(unsigned long, inactive) - __field(unsigned long, total_active) - __field(unsigned long, active) - __field(unsigned long, ratio) - __field(int, reclaim_flags) - ), - - TP_fast_assign( - __entry->nid = nid; - __entry->reclaim_idx = reclaim_idx; - __entry->total_inactive = total_inactive; - __entry->inactive = inactive; - __entry->total_active = total_active; - __entry->active = active; - __entry->ratio = ratio; - __entry->reclaim_flags = trace_reclaim_flags(file) & - RECLAIM_WB_LRU; - ), - - TP_printk("nid=%d reclaim_idx=%d total_inactive=%ld inactive=%ld total_active=%ld active=%ld ratio=%ld flags=%s", - __entry->nid, - __entry->reclaim_idx, - __entry->total_inactive, __entry->inactive, - __entry->total_active, __entry->active, - __entry->ratio, - show_reclaim_flags(__entry->reclaim_flags)) -); - TRACE_EVENT(mm_vmscan_node_reclaim_begin, TP_PROTO(int nid, int order, gfp_t gfp_flags), diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index f94f65d429be..1567a3294c3d 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -72,6 +72,9 @@ #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ +#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ +#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 4832fd0b5642..19a00bc7fe86 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -60,7 +60,6 @@ enum { * are never OR'ed into the mode in mempolicy API arguments. */ #define MPOL_F_SHARED (1 << 0) /* identify shared policies */ -#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ #define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */ #define MPOL_F_MORON (1 << 4) /* Migrate On protnone Reference On Node */ diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 650480f41f1d..05b31d60acf6 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -31,7 +31,8 @@ UFFD_FEATURE_MISSING_SHMEM | \ UFFD_FEATURE_SIGBUS | \ UFFD_FEATURE_THREAD_ID | \ - UFFD_FEATURE_MINOR_HUGETLBFS) + UFFD_FEATURE_MINOR_HUGETLBFS | \ + UFFD_FEATURE_MINOR_SHMEM) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -185,6 +186,9 @@ struct uffdio_api { * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults * can be intercepted (via REGISTER_MODE_MINOR) for * hugetlbfs-backed pages. + * + * UFFD_FEATURE_MINOR_SHMEM indicates the same support as + * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -196,6 +200,7 @@ struct uffdio_api { #define UFFD_FEATURE_SIGBUS (1<<7) #define UFFD_FEATURE_THREAD_ID (1<<8) #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) +#define UFFD_FEATURE_MINOR_SHMEM (1<<10) __u64 features; __u64 ioctls; |