summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-07-02 21:08:10 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2021-07-02 21:08:10 +0200
commit71bd9341011f626d692aabe024f099820f02c497 (patch)
treea1c27fd8f17daff36e380800c5b69769d0d9cc99 /include
parentMerge branch 'for-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/c... (diff)
parentipc/util.c: use binary search for max_idx (diff)
downloadlinux-71bd9341011f626d692aabe024f099820f02c497.tar.xz
linux-71bd9341011f626d692aabe024f099820f02c497.zip
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: "190 patches. Subsystems affected by this patch series: mm (hugetlb, userfaultfd, vmscan, kconfig, proc, z3fold, zbud, ras, mempolicy, memblock, migration, thp, nommu, kconfig, madvise, memory-hotplug, zswap, zsmalloc, zram, cleanups, kfence, and hmm), procfs, sysctl, misc, core-kernel, lib, lz4, checkpatch, init, kprobes, nilfs2, hfs, signals, exec, kcov, selftests, compress/decompress, and ipc" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (190 commits) ipc/util.c: use binary search for max_idx ipc/sem.c: use READ_ONCE()/WRITE_ONCE() for use_global_lock ipc: use kmalloc for msg_queue and shmid_kernel ipc sem: use kvmalloc for sem_undo allocation lib/decompressors: remove set but not used variabled 'level' selftests/vm/pkeys: exercise x86 XSAVE init state selftests/vm/pkeys: refill shadow register after implicit kernel write selftests/vm/pkeys: handle negative sys_pkey_alloc() return code selftests/vm/pkeys: fix alloc_random_pkey() to make it really, really random kcov: add __no_sanitize_coverage to fix noinstr for all architectures exec: remove checks in __register_bimfmt() x86: signal: don't do sas_ss_reset() until we are certain that sigframe won't be abandoned hfsplus: report create_date to kstat.btime hfsplus: remove unnecessary oom message nilfs2: remove redundant continue statement in a while-loop kprobes: remove duplicated strong free_insn_page in x86 and s390 init: print out unknown kernel parameters checkpatch: do not complain about positive return values starting with EPOLL checkpatch: improve the indented label test checkpatch: scripts/spdxcheck.py now requires python3 ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/bug.h3
-rw-r--r--include/linux/ascii85.h3
-rw-r--r--include/linux/bootmem_info.h66
-rw-r--r--include/linux/compat.h2
-rw-r--r--include/linux/compiler-clang.h17
-rw-r--r--include/linux/compiler-gcc.h6
-rw-r--r--include/linux/compiler_types.h2
-rw-r--r--include/linux/huge_mm.h70
-rw-r--r--include/linux/hugetlb.h42
-rw-r--r--include/linux/hugetlb_cgroup.h19
-rw-r--r--include/linux/kcore.h3
-rw-r--r--include/linux/kernel.h227
-rw-r--r--include/linux/kprobes.h1
-rw-r--r--include/linux/kstrtox.h155
-rw-r--r--include/linux/memblock.h4
-rw-r--r--include/linux/memory_hotplug.h27
-rw-r--r--include/linux/mempolicy.h9
-rw-r--r--include/linux/memremap.h2
-rw-r--r--include/linux/migrate.h27
-rw-r--r--include/linux/mm.h14
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--include/linux/mmu_notifier.h26
-rw-r--r--include/linux/mmzone.h27
-rw-r--r--include/linux/mpi.h4
-rw-r--r--include/linux/page-flags.h22
-rw-r--r--include/linux/panic.h98
-rw-r--r--include/linux/panic_notifier.h12
-rw-r--r--include/linux/pgtable.h44
-rw-r--r--include/linux/rmap.h13
-rw-r--r--include/linux/seq_file.h10
-rw-r--r--include/linux/shmem_fs.h19
-rw-r--r--include/linux/signal.h2
-rw-r--r--include/linux/string.h7
-rw-r--r--include/linux/string_helpers.h31
-rw-r--r--include/linux/sunrpc/cache.h1
-rw-r--r--include/linux/swap.h19
-rw-r--r--include/linux/swapops.h125
-rw-r--r--include/linux/thread_info.h1
-rw-r--r--include/linux/userfaultfd_k.h5
-rw-r--r--include/linux/vmalloc.h15
-rw-r--r--include/linux/zbud.h23
-rw-r--r--include/trace/events/vmscan.h41
-rw-r--r--include/uapi/asm-generic/mman-common.h3
-rw-r--r--include/uapi/linux/mempolicy.h1
-rw-r--r--include/uapi/linux/userfaultfd.h7
45 files changed, 744 insertions, 513 deletions
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index bafc51f483c4..edb0e2a602a8 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -18,7 +18,8 @@
#endif
#ifndef __ASSEMBLY__
-#include <linux/kernel.h>
+#include <linux/panic.h>
+#include <linux/printk.h>
#ifdef CONFIG_BUG
diff --git a/include/linux/ascii85.h b/include/linux/ascii85.h
index 4cc40201273e..83ad775ad0aa 100644
--- a/include/linux/ascii85.h
+++ b/include/linux/ascii85.h
@@ -8,7 +8,8 @@
#ifndef _ASCII85_H_
#define _ASCII85_H_
-#include <linux/kernel.h>
+#include <linux/math.h>
+#include <linux/types.h>
#define ASCII85_BUFSZ 6
diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h
new file mode 100644
index 000000000000..2bc8b1f69c93
--- /dev/null
+++ b/include/linux/bootmem_info.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_BOOTMEM_INFO_H
+#define __LINUX_BOOTMEM_INFO_H
+
+#include <linux/mm.h>
+
+/*
+ * Types for free bootmem stored in page->lru.next. These have to be in
+ * some random range in unsigned long space for debugging purposes.
+ */
+enum {
+ MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12,
+ SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE,
+ MIX_SECTION_INFO,
+ NODE_INFO,
+ MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
+};
+
+#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
+void __init register_page_bootmem_info_node(struct pglist_data *pgdat);
+
+void get_page_bootmem(unsigned long info, struct page *page,
+ unsigned long type);
+void put_page_bootmem(struct page *page);
+
+/*
+ * Any memory allocated via the memblock allocator and not via the
+ * buddy will be marked reserved already in the memmap. For those
+ * pages, we can call this function to free it to buddy allocator.
+ */
+static inline void free_bootmem_page(struct page *page)
+{
+ unsigned long magic = (unsigned long)page->freelist;
+
+ /*
+ * The reserve_bootmem_region sets the reserved flag on bootmem
+ * pages.
+ */
+ VM_BUG_ON_PAGE(page_ref_count(page) != 2, page);
+
+ if (magic == SECTION_INFO || magic == MIX_SECTION_INFO)
+ put_page_bootmem(page);
+ else
+ VM_BUG_ON_PAGE(1, page);
+}
+#else
+static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
+{
+}
+
+static inline void put_page_bootmem(struct page *page)
+{
+}
+
+static inline void get_page_bootmem(unsigned long info, struct page *page,
+ unsigned long type)
+{
+}
+
+static inline void free_bootmem_page(struct page *page)
+{
+ free_reserved_page(page);
+}
+#endif
+
+#endif /* __LINUX_BOOTMEM_INFO_H */
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 8855b1b702b2..c270124e4402 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -532,8 +532,6 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long);
&__uss->ss_sp, label); \
unsafe_put_user(t->sas_ss_flags, &__uss->ss_flags, label); \
unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \
- if (t->sas_ss_flags & SS_AUTODISARM) \
- sas_ss_reset(t); \
} while (0);
/*
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index adbe76b203e2..49b0ac8b6fd3 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -13,6 +13,12 @@
/* all clang versions usable with the kernel support KASAN ABI version 5 */
#define KASAN_ABI_VERSION 5
+/*
+ * Note: Checking __has_feature(*_sanitizer) is only true if the feature is
+ * enabled. Therefore it is not required to additionally check defined(CONFIG_*)
+ * to avoid adding redundant attributes in other configurations.
+ */
+
#if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer)
/* Emulate GCC's __SANITIZE_ADDRESS__ flag */
#define __SANITIZE_ADDRESS__
@@ -46,6 +52,17 @@
#endif
/*
+ * Support for __has_feature(coverage_sanitizer) was added in Clang 13 together
+ * with no_sanitize("coverage"). Prior versions of Clang support coverage
+ * instrumentation, but cannot be queried for support by the preprocessor.
+ */
+#if __has_feature(coverage_sanitizer)
+#define __no_sanitize_coverage __attribute__((no_sanitize("coverage")))
+#else
+#define __no_sanitize_coverage
+#endif
+
+/*
* Not all versions of clang implement the type-generic versions
* of the builtin overflow checkers. Fortunately, clang implements
* __has_builtin allowing us to avoid awkward version
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 5d97ef738a57..cb9217fc60af 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -122,6 +122,12 @@
#define __no_sanitize_undefined
#endif
+#if defined(CONFIG_KCOV) && __has_attribute(__no_sanitize_coverage__)
+#define __no_sanitize_coverage __attribute__((no_sanitize_coverage))
+#else
+#define __no_sanitize_coverage
+#endif
+
#if GCC_VERSION >= 50100
#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
#endif
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index d509169860f1..e4ea86fc584d 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -210,7 +210,7 @@ struct ftrace_likely_data {
/* Section for code which can't be instrumented at all */
#define noinstr \
noinline notrace __attribute((__section__(".noinstr.text"))) \
- __no_kcsan __no_sanitize_address __no_profile
+ __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage
#endif /* __KERNEL__ */
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 2a8ebe6c222e..f123e15d966e 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -10,8 +10,8 @@
vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
- struct vm_area_struct *vma);
-void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd);
+ struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
+void huge_pmd_set_accessed(struct vm_fault *vmf);
int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
struct vm_area_struct *vma);
@@ -24,7 +24,7 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
}
#endif
-vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
+vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf);
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmd,
unsigned int flags);
@@ -115,9 +115,34 @@ extern struct kobj_attribute shmem_enabled_attr;
extern unsigned long transparent_hugepage_flags;
+static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
+ unsigned long haddr)
+{
+ /* Don't have to check pgoff for anonymous vma */
+ if (!vma_is_anonymous(vma)) {
+ if (!IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
+ HPAGE_PMD_NR))
+ return false;
+ }
+
+ if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
+ return false;
+ return true;
+}
+
+static inline bool transhuge_vma_enabled(struct vm_area_struct *vma,
+ unsigned long vm_flags)
+{
+ /* Explicitly disabled through madvise. */
+ if ((vm_flags & VM_NOHUGEPAGE) ||
+ test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
+ return false;
+ return true;
+}
+
/*
* to be used on vmas which are known to support THP.
- * Use transparent_hugepage_enabled otherwise
+ * Use transparent_hugepage_active otherwise
*/
static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
{
@@ -128,15 +153,12 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_NEVER_DAX))
return false;
- if (vma->vm_flags & VM_NOHUGEPAGE)
+ if (!transhuge_vma_enabled(vma, vma->vm_flags))
return false;
if (vma_is_temporary_stack(vma))
return false;
- if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
- return false;
-
if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
return true;
@@ -150,24 +172,7 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
return false;
}
-bool transparent_hugepage_enabled(struct vm_area_struct *vma);
-
-#define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1)
-
-static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
- unsigned long haddr)
-{
- /* Don't have to check pgoff for anonymous vma */
- if (!vma_is_anonymous(vma)) {
- if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) !=
- (vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK))
- return false;
- }
-
- if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
- return false;
- return true;
-}
+bool transparent_hugepage_active(struct vm_area_struct *vma);
#define transparent_hugepage_use_zero_page() \
(transparent_hugepage_flags & \
@@ -283,7 +288,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
pud_t *pud, int flags, struct dev_pagemap **pgmap);
-vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
+vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);
extern struct page *huge_zero_page;
extern unsigned long huge_zero_pfn;
@@ -354,7 +359,7 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
return false;
}
-static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
+static inline bool transparent_hugepage_active(struct vm_area_struct *vma)
{
return false;
}
@@ -365,6 +370,12 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
return false;
}
+static inline bool transhuge_vma_enabled(struct vm_area_struct *vma,
+ unsigned long vm_flags)
+{
+ return false;
+}
+
static inline void prep_transhuge_page(struct page *page) {}
static inline bool is_transparent_hugepage(struct page *page)
@@ -430,8 +441,7 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
return NULL;
}
-static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf,
- pmd_t orig_pmd)
+static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
{
return 0;
}
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8ba79dc64ab8..8e0f32f935bd 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -29,6 +29,23 @@ typedef struct { unsigned long pd; } hugepd_t;
#include <linux/shm.h>
#include <asm/tlbflush.h>
+/*
+ * For HugeTLB page, there are more metadata to save in the struct page. But
+ * the head struct page cannot meet our needs, so we have to abuse other tail
+ * struct page to store the metadata. In order to avoid conflicts caused by
+ * subsequent use of more tail struct pages, we gather these discrete indexes
+ * of tail struct page here.
+ */
+enum {
+ SUBPAGE_INDEX_SUBPOOL = 1, /* reuse page->private */
+#ifdef CONFIG_CGROUP_HUGETLB
+ SUBPAGE_INDEX_CGROUP, /* reuse page->private */
+ SUBPAGE_INDEX_CGROUP_RSVD, /* reuse page->private */
+ __MAX_CGROUP_SUBPAGE_INDEX = SUBPAGE_INDEX_CGROUP_RSVD,
+#endif
+ __NR_USED_SUBPAGE,
+};
+
struct hugepage_subpool {
spinlock_t lock;
long count;
@@ -515,12 +532,14 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
* modifications require hugetlb_lock.
* HPG_freed - Set when page is on the free lists.
* Synchronization: hugetlb_lock held for examination and modification.
+ * HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed.
*/
enum hugetlb_page_flags {
HPG_restore_reserve = 0,
HPG_migratable,
HPG_temporary,
HPG_freed,
+ HPG_vmemmap_optimized,
__NR_HPAGEFLAGS,
};
@@ -566,6 +585,7 @@ HPAGEFLAG(RestoreReserve, restore_reserve)
HPAGEFLAG(Migratable, migratable)
HPAGEFLAG(Temporary, temporary)
HPAGEFLAG(Freed, freed)
+HPAGEFLAG(VmemmapOptimized, vmemmap_optimized)
#ifdef CONFIG_HUGETLB_PAGE
@@ -588,6 +608,9 @@ struct hstate {
unsigned int nr_huge_pages_node[MAX_NUMNODES];
unsigned int free_huge_pages_node[MAX_NUMNODES];
unsigned int surplus_huge_pages_node[MAX_NUMNODES];
+#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
+ unsigned int nr_free_vmemmap_pages;
+#endif
#ifdef CONFIG_CGROUP_HUGETLB
/* cgroup control files */
struct cftype cgroup_files_dfl[7];
@@ -635,13 +658,13 @@ extern unsigned int default_hstate_idx;
*/
static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
{
- return (struct hugepage_subpool *)(hpage+1)->private;
+ return (void *)page_private(hpage + SUBPAGE_INDEX_SUBPOOL);
}
static inline void hugetlb_set_page_subpool(struct page *hpage,
struct hugepage_subpool *subpool)
{
- set_page_private(hpage+1, (unsigned long)subpool);
+ set_page_private(hpage + SUBPAGE_INDEX_SUBPOOL, (unsigned long)subpool);
}
static inline struct hstate *hstate_file(struct file *f)
@@ -718,8 +741,8 @@ static inline void arch_clear_hugepage_flags(struct page *page) { }
#endif
#ifndef arch_make_huge_pte
-static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
- struct page *page, int writable)
+static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift,
+ vm_flags_t flags)
{
return entry;
}
@@ -875,6 +898,11 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};
+static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
+{
+ return NULL;
+}
+
static inline int isolate_or_dissolve_huge_page(struct page *page,
struct list_head *list)
{
@@ -1028,6 +1056,12 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
}
#endif /* CONFIG_HUGETLB_PAGE */
+#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
+extern bool hugetlb_free_vmemmap_enabled;
+#else
+#define hugetlb_free_vmemmap_enabled false
+#endif
+
static inline spinlock_t *huge_pte_lock(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 0bff345c4bc6..0b8d1fdda3a1 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -21,15 +21,16 @@ struct hugetlb_cgroup;
struct resv_map;
struct file_region;
+#ifdef CONFIG_CGROUP_HUGETLB
/*
* Minimum page order trackable by hugetlb cgroup.
* At least 4 pages are necessary for all the tracking information.
- * The second tail page (hpage[2]) is the fault usage cgroup.
- * The third tail page (hpage[3]) is the reservation usage cgroup.
+ * The second tail page (hpage[SUBPAGE_INDEX_CGROUP]) is the fault
+ * usage cgroup. The third tail page (hpage[SUBPAGE_INDEX_CGROUP_RSVD])
+ * is the reservation usage cgroup.
*/
-#define HUGETLB_CGROUP_MIN_ORDER 2
+#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__MAX_CGROUP_SUBPAGE_INDEX + 1)
-#ifdef CONFIG_CGROUP_HUGETLB
enum hugetlb_memory_event {
HUGETLB_MAX,
HUGETLB_NR_MEMORY_EVENTS,
@@ -66,9 +67,9 @@ __hugetlb_cgroup_from_page(struct page *page, bool rsvd)
if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
return NULL;
if (rsvd)
- return (struct hugetlb_cgroup *)page[3].private;
+ return (void *)page_private(page + SUBPAGE_INDEX_CGROUP_RSVD);
else
- return (struct hugetlb_cgroup *)page[2].private;
+ return (void *)page_private(page + SUBPAGE_INDEX_CGROUP);
}
static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
@@ -90,9 +91,11 @@ static inline int __set_hugetlb_cgroup(struct page *page,
if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
return -1;
if (rsvd)
- page[3].private = (unsigned long)h_cg;
+ set_page_private(page + SUBPAGE_INDEX_CGROUP_RSVD,
+ (unsigned long)h_cg);
else
- page[2].private = (unsigned long)h_cg;
+ set_page_private(page + SUBPAGE_INDEX_CGROUP,
+ (unsigned long)h_cg);
return 0;
}
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index da676cdbd727..86c0f1d18998 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -11,14 +11,11 @@ enum kcore_type {
KCORE_RAM,
KCORE_VMEMMAP,
KCORE_USER,
- KCORE_OTHER,
- KCORE_REMAP,
};
struct kcore_list {
struct list_head list;
unsigned long addr;
- unsigned long vaddr;
size_t size;
int type;
};
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f2ad8a53f71f..1b2f0a7e00d6 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -10,10 +10,12 @@
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/bitops.h>
+#include <linux/kstrtox.h>
#include <linux/log2.h>
#include <linux/math.h>
#include <linux/minmax.h>
#include <linux/typecheck.h>
+#include <linux/panic.h>
#include <linux/printk.h>
#include <linux/build_bug.h>
#include <linux/static_call_types.h>
@@ -84,7 +86,6 @@
#define lower_16_bits(n) ((u16)((n) & 0xffff))
struct completion;
-struct pt_regs;
struct user;
#ifdef CONFIG_PREEMPT_VOLUNTARY
@@ -189,159 +190,9 @@ void __might_fault(const char *file, int line);
static inline void might_fault(void) { }
#endif
-extern struct atomic_notifier_head panic_notifier_list;
-extern long (*panic_blink)(int state);
-__printf(1, 2)
-void panic(const char *fmt, ...) __noreturn __cold;
-void nmi_panic(struct pt_regs *regs, const char *msg);
-extern void oops_enter(void);
-extern void oops_exit(void);
-extern bool oops_may_print(void);
void do_exit(long error_code) __noreturn;
void complete_and_exit(struct completion *, long) __noreturn;
-/* Internal, do not use. */
-int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
-int __must_check _kstrtol(const char *s, unsigned int base, long *res);
-
-int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res);
-int __must_check kstrtoll(const char *s, unsigned int base, long long *res);
-
-/**
- * kstrtoul - convert a string to an unsigned long
- * @s: The start of the string. The string must be null-terminated, and may also
- * include a single newline before its terminating null. The first character
- * may also be a plus sign, but not a minus sign.
- * @base: The number base to use. The maximum supported base is 16. If base is
- * given as 0, then the base of the string is automatically detected with the
- * conventional semantics - If it begins with 0x the number will be parsed as a
- * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
- * parsed as an octal number. Otherwise it will be parsed as a decimal.
- * @res: Where to write the result of the conversion on success.
- *
- * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
- * Preferred over simple_strtoul(). Return code must be checked.
-*/
-static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res)
-{
- /*
- * We want to shortcut function call, but
- * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0.
- */
- if (sizeof(unsigned long) == sizeof(unsigned long long) &&
- __alignof__(unsigned long) == __alignof__(unsigned long long))
- return kstrtoull(s, base, (unsigned long long *)res);
- else
- return _kstrtoul(s, base, res);
-}
-
-/**
- * kstrtol - convert a string to a long
- * @s: The start of the string. The string must be null-terminated, and may also
- * include a single newline before its terminating null. The first character
- * may also be a plus sign or a minus sign.
- * @base: The number base to use. The maximum supported base is 16. If base is
- * given as 0, then the base of the string is automatically detected with the
- * conventional semantics - If it begins with 0x the number will be parsed as a
- * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
- * parsed as an octal number. Otherwise it will be parsed as a decimal.
- * @res: Where to write the result of the conversion on success.
- *
- * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
- * Preferred over simple_strtol(). Return code must be checked.
- */
-static inline int __must_check kstrtol(const char *s, unsigned int base, long *res)
-{
- /*
- * We want to shortcut function call, but
- * __builtin_types_compatible_p(long, long long) = 0.
- */
- if (sizeof(long) == sizeof(long long) &&
- __alignof__(long) == __alignof__(long long))
- return kstrtoll(s, base, (long long *)res);
- else
- return _kstrtol(s, base, res);
-}
-
-int __must_check kstrtouint(const char *s, unsigned int base, unsigned int *res);
-int __must_check kstrtoint(const char *s, unsigned int base, int *res);
-
-static inline int __must_check kstrtou64(const char *s, unsigned int base, u64 *res)
-{
- return kstrtoull(s, base, res);
-}
-
-static inline int __must_check kstrtos64(const char *s, unsigned int base, s64 *res)
-{
- return kstrtoll(s, base, res);
-}
-
-static inline int __must_check kstrtou32(const char *s, unsigned int base, u32 *res)
-{
- return kstrtouint(s, base, res);
-}
-
-static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *res)
-{
- return kstrtoint(s, base, res);
-}
-
-int __must_check kstrtou16(const char *s, unsigned int base, u16 *res);
-int __must_check kstrtos16(const char *s, unsigned int base, s16 *res);
-int __must_check kstrtou8(const char *s, unsigned int base, u8 *res);
-int __must_check kstrtos8(const char *s, unsigned int base, s8 *res);
-int __must_check kstrtobool(const char *s, bool *res);
-
-int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res);
-int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res);
-int __must_check kstrtoul_from_user(const char __user *s, size_t count, unsigned int base, unsigned long *res);
-int __must_check kstrtol_from_user(const char __user *s, size_t count, unsigned int base, long *res);
-int __must_check kstrtouint_from_user(const char __user *s, size_t count, unsigned int base, unsigned int *res);
-int __must_check kstrtoint_from_user(const char __user *s, size_t count, unsigned int base, int *res);
-int __must_check kstrtou16_from_user(const char __user *s, size_t count, unsigned int base, u16 *res);
-int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res);
-int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res);
-int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res);
-int __must_check kstrtobool_from_user(const char __user *s, size_t count, bool *res);
-
-static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res)
-{
- return kstrtoull_from_user(s, count, base, res);
-}
-
-static inline int __must_check kstrtos64_from_user(const char __user *s, size_t count, unsigned int base, s64 *res)
-{
- return kstrtoll_from_user(s, count, base, res);
-}
-
-static inline int __must_check kstrtou32_from_user(const char __user *s, size_t count, unsigned int base, u32 *res)
-{
- return kstrtouint_from_user(s, count, base, res);
-}
-
-static inline int __must_check kstrtos32_from_user(const char __user *s, size_t count, unsigned int base, s32 *res)
-{
- return kstrtoint_from_user(s, count, base, res);
-}
-
-/*
- * Use kstrto<foo> instead.
- *
- * NOTE: simple_strto<foo> does not check for the range overflow and,
- * depending on the input, may give interesting results.
- *
- * Use these functions if and only if you cannot use kstrto<foo>, because
- * the conversion ends on the first non-digit character, which may be far
- * beyond the supported range. It might be useful to parse the strings like
- * 10x50 or 12:21 without altering original string or temporary buffer in use.
- * Keep in mind above caveat.
- */
-
-extern unsigned long simple_strtoul(const char *,char **,unsigned int);
-extern long simple_strtol(const char *,char **,unsigned int);
-extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
-extern long long simple_strtoll(const char *,char **,unsigned int);
-
extern int num_to_str(char *buf, int size,
unsigned long long num, unsigned int width);
@@ -384,52 +235,8 @@ extern int __kernel_text_address(unsigned long addr);
extern int kernel_text_address(unsigned long addr);
extern int func_ptr_is_kernel_text(void *ptr);
-#ifdef CONFIG_SMP
-extern unsigned int sysctl_oops_all_cpu_backtrace;
-#else
-#define sysctl_oops_all_cpu_backtrace 0
-#endif /* CONFIG_SMP */
-
extern void bust_spinlocks(int yes);
-extern int panic_timeout;
-extern unsigned long panic_print;
-extern int panic_on_oops;
-extern int panic_on_unrecovered_nmi;
-extern int panic_on_io_nmi;
-extern int panic_on_warn;
-extern unsigned long panic_on_taint;
-extern bool panic_on_taint_nousertaint;
-extern int sysctl_panic_on_rcu_stall;
-extern int sysctl_max_rcu_stall_to_panic;
-extern int sysctl_panic_on_stackoverflow;
-
-extern bool crash_kexec_post_notifiers;
-/*
- * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It
- * holds a CPU number which is executing panic() currently. A value of
- * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec().
- */
-extern atomic_t panic_cpu;
-#define PANIC_CPU_INVALID -1
-
-/*
- * Only to be used by arch init code. If the user over-wrote the default
- * CONFIG_PANIC_TIMEOUT, honor it.
- */
-static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout)
-{
- if (panic_timeout == arch_default_timeout)
- panic_timeout = timeout;
-}
-extern const char *print_tainted(void);
-enum lockdep_ok {
- LOCKDEP_STILL_OK,
- LOCKDEP_NOW_UNRELIABLE
-};
-extern void add_taint(unsigned flag, enum lockdep_ok);
-extern int test_taint(unsigned flag);
-extern unsigned long get_taint(void);
extern int root_mountflags;
extern bool early_boot_irqs_disabled;
@@ -448,36 +255,6 @@ extern enum system_states {
SYSTEM_SUSPEND,
} system_state;
-/* This cannot be an enum because some may be used in assembly source. */
-#define TAINT_PROPRIETARY_MODULE 0
-#define TAINT_FORCED_MODULE 1
-#define TAINT_CPU_OUT_OF_SPEC 2
-#define TAINT_FORCED_RMMOD 3
-#define TAINT_MACHINE_CHECK 4
-#define TAINT_BAD_PAGE 5
-#define TAINT_USER 6
-#define TAINT_DIE 7
-#define TAINT_OVERRIDDEN_ACPI_TABLE 8
-#define TAINT_WARN 9
-#define TAINT_CRAP 10
-#define TAINT_FIRMWARE_WORKAROUND 11
-#define TAINT_OOT_MODULE 12
-#define TAINT_UNSIGNED_MODULE 13
-#define TAINT_SOFTLOCKUP 14
-#define TAINT_LIVEPATCH 15
-#define TAINT_AUX 16
-#define TAINT_RANDSTRUCT 17
-#define TAINT_FLAGS_COUNT 18
-#define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1)
-
-struct taint_flag {
- char c_true; /* character printed when tainted */
- char c_false; /* character printed when not tainted */
- bool module; /* also show as a per-module taint flag */
-};
-
-extern const struct taint_flag taint_flags[TAINT_FLAGS_COUNT];
-
extern const char hex_asc[];
#define hex_asc_lo(x) hex_asc[((x) & 0x0f)]
#define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4]
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 523ffc7bc3a8..4d0c28c2ba12 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -399,7 +399,6 @@ int enable_kprobe(struct kprobe *kp);
void dump_kprobe(struct kprobe *kp);
void *alloc_insn_page(void);
-void free_insn_page(void *page);
int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
char *sym);
diff --git a/include/linux/kstrtox.h b/include/linux/kstrtox.h
new file mode 100644
index 000000000000..529974e22ea7
--- /dev/null
+++ b/include/linux/kstrtox.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_KSTRTOX_H
+#define _LINUX_KSTRTOX_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+/* Internal, do not use. */
+int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
+int __must_check _kstrtol(const char *s, unsigned int base, long *res);
+
+int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res);
+int __must_check kstrtoll(const char *s, unsigned int base, long long *res);
+
+/**
+ * kstrtoul - convert a string to an unsigned long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Preferred over simple_strtoul(). Return code must be checked.
+*/
+static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res)
+{
+ /*
+ * We want to shortcut function call, but
+ * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0.
+ */
+ if (sizeof(unsigned long) == sizeof(unsigned long long) &&
+ __alignof__(unsigned long) == __alignof__(unsigned long long))
+ return kstrtoull(s, base, (unsigned long long *)res);
+ else
+ return _kstrtoul(s, base, res);
+}
+
+/**
+ * kstrtol - convert a string to a long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Preferred over simple_strtol(). Return code must be checked.
+ */
+static inline int __must_check kstrtol(const char *s, unsigned int base, long *res)
+{
+ /*
+ * We want to shortcut function call, but
+ * __builtin_types_compatible_p(long, long long) = 0.
+ */
+ if (sizeof(long) == sizeof(long long) &&
+ __alignof__(long) == __alignof__(long long))
+ return kstrtoll(s, base, (long long *)res);
+ else
+ return _kstrtol(s, base, res);
+}
+
+int __must_check kstrtouint(const char *s, unsigned int base, unsigned int *res);
+int __must_check kstrtoint(const char *s, unsigned int base, int *res);
+
+static inline int __must_check kstrtou64(const char *s, unsigned int base, u64 *res)
+{
+ return kstrtoull(s, base, res);
+}
+
+static inline int __must_check kstrtos64(const char *s, unsigned int base, s64 *res)
+{
+ return kstrtoll(s, base, res);
+}
+
+static inline int __must_check kstrtou32(const char *s, unsigned int base, u32 *res)
+{
+ return kstrtouint(s, base, res);
+}
+
+static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *res)
+{
+ return kstrtoint(s, base, res);
+}
+
+int __must_check kstrtou16(const char *s, unsigned int base, u16 *res);
+int __must_check kstrtos16(const char *s, unsigned int base, s16 *res);
+int __must_check kstrtou8(const char *s, unsigned int base, u8 *res);
+int __must_check kstrtos8(const char *s, unsigned int base, s8 *res);
+int __must_check kstrtobool(const char *s, bool *res);
+
+int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res);
+int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res);
+int __must_check kstrtoul_from_user(const char __user *s, size_t count, unsigned int base, unsigned long *res);
+int __must_check kstrtol_from_user(const char __user *s, size_t count, unsigned int base, long *res);
+int __must_check kstrtouint_from_user(const char __user *s, size_t count, unsigned int base, unsigned int *res);
+int __must_check kstrtoint_from_user(const char __user *s, size_t count, unsigned int base, int *res);
+int __must_check kstrtou16_from_user(const char __user *s, size_t count, unsigned int base, u16 *res);
+int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res);
+int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res);
+int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res);
+int __must_check kstrtobool_from_user(const char __user *s, size_t count, bool *res);
+
+static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res)
+{
+ return kstrtoull_from_user(s, count, base, res);
+}
+
+static inline int __must_check kstrtos64_from_user(const char __user *s, size_t count, unsigned int base, s64 *res)
+{
+ return kstrtoll_from_user(s, count, base, res);
+}
+
+static inline int __must_check kstrtou32_from_user(const char __user *s, size_t count, unsigned int base, u32 *res)
+{
+ return kstrtouint_from_user(s, count, base, res);
+}
+
+static inline int __must_check kstrtos32_from_user(const char __user *s, size_t count, unsigned int base, s32 *res)
+{
+ return kstrtoint_from_user(s, count, base, res);
+}
+
+/*
+ * Use kstrto<foo> instead.
+ *
+ * NOTE: simple_strto<foo> does not check for the range overflow and,
+ * depending on the input, may give interesting results.
+ *
+ * Use these functions if and only if you cannot use kstrto<foo>, because
+ * the conversion ends on the first non-digit character, which may be far
+ * beyond the supported range. It might be useful to parse the strings like
+ * 10x50 or 12:21 without altering original string or temporary buffer in use.
+ * Keep in mind above caveat.
+ */
+
+extern unsigned long simple_strtoul(const char *,char **,unsigned int);
+extern long simple_strtol(const char *,char **,unsigned int);
+extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
+extern long long simple_strtoll(const char *,char **,unsigned int);
+
+static inline int strtobool(const char *s, bool *res)
+{
+ return kstrtobool(s, res);
+}
+
+#endif /* _LINUX_KSTRTOX_H */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 552309342c38..cbf46f56d105 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -30,7 +30,9 @@ extern unsigned long long max_possible_pfn;
* @MEMBLOCK_NONE: no special request
* @MEMBLOCK_HOTPLUG: hotpluggable region
* @MEMBLOCK_MIRROR: mirrored region
- * @MEMBLOCK_NOMAP: don't add to kernel direct mapping
+ * @MEMBLOCK_NOMAP: don't add to kernel direct mapping and treat as
+ * reserved in the memory map; refer to memblock_mark_nomap() description
+ * for further details
*/
enum memblock_flags {
MEMBLOCK_NONE = 0x0, /* No special request */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 28f32fd00fe9..a7fd2c3ccb77 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -18,18 +18,6 @@ struct vmem_altmap;
#ifdef CONFIG_MEMORY_HOTPLUG
struct page *pfn_to_online_page(unsigned long pfn);
-/*
- * Types for free bootmem stored in page->lru.next. These have to be in
- * some random range in unsigned long space for debugging purposes.
- */
-enum {
- MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12,
- SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE,
- MIX_SECTION_INFO,
- NODE_INFO,
- MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
-};
-
/* Types for control the zone type of onlined and offlined memory */
enum {
/* Offline the memory. */
@@ -222,17 +210,6 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
#endif /* CONFIG_NUMA */
#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
-#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
-extern void __init register_page_bootmem_info_node(struct pglist_data *pgdat);
-#else
-static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
-{
-}
-#endif
-extern void put_page_bootmem(struct page *page);
-extern void get_page_bootmem(unsigned long ingo, struct page *page,
- unsigned long type);
-
void get_online_mems(void);
void put_online_mems(void);
@@ -260,10 +237,6 @@ static inline void zone_span_writelock(struct zone *zone) {}
static inline void zone_span_writeunlock(struct zone *zone) {}
static inline void zone_seqlock_init(struct zone *zone) {}
-static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
-{
-}
-
static inline int try_online_node(int nid)
{
return 0;
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5f1c74df264d..0aaf91b496e2 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -46,11 +46,8 @@ struct mempolicy {
atomic_t refcnt;
unsigned short mode; /* See MPOL_* above */
unsigned short flags; /* See set_mempolicy() MPOL_F_* above */
- union {
- short preferred_node; /* preferred */
- nodemask_t nodes; /* interleave/bind */
- /* undefined for default */
- } v;
+ nodemask_t nodes; /* interleave/bind/perfer */
+
union {
nodemask_t cpuset_mems_allowed; /* relative to these nodes */
nodemask_t user_nodemask; /* nodemask passed by user */
@@ -150,7 +147,7 @@ extern int huge_node(struct vm_area_struct *vma,
unsigned long addr, gfp_t gfp_flags,
struct mempolicy **mpol, nodemask_t **nodemask);
extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
-extern bool mempolicy_nodemask_intersects(struct task_struct *tsk,
+extern bool mempolicy_in_oom_domain(struct task_struct *tsk,
const nodemask_t *mask);
extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy);
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 45a79da89c5f..c0e9d35889e8 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -26,7 +26,7 @@ struct vmem_altmap {
};
/*
- * Specialize ZONE_DEVICE memory into multiple types each having differents
+ * Specialize ZONE_DEVICE memory into multiple types each has a different
* usage.
*
* MEMORY_DEVICE_PRIVATE:
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 4bb4e519e3f5..9b7b7cd3bae9 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -51,6 +51,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page);
extern int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page, int extra_count);
+extern void copy_huge_page(struct page *dst, struct page *src);
#else
static inline void putback_movable_pages(struct list_head *l) {}
@@ -77,6 +78,9 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
return -ENOSYS;
}
+static inline void copy_huge_page(struct page *dst, struct page *src)
+{
+}
#endif /* CONFIG_MIGRATION */
#ifdef CONFIG_COMPACTION
@@ -95,14 +99,9 @@ static inline void __ClearPageMovable(struct page *page)
#endif
#ifdef CONFIG_NUMA_BALANCING
-extern bool pmd_trans_migrating(pmd_t pmd);
extern int migrate_misplaced_page(struct page *page,
struct vm_area_struct *vma, int node);
#else
-static inline bool pmd_trans_migrating(pmd_t pmd)
-{
- return false;
-}
static inline int migrate_misplaced_page(struct page *page,
struct vm_area_struct *vma, int node)
{
@@ -110,24 +109,6 @@ static inline int migrate_misplaced_page(struct page *page,
}
#endif /* CONFIG_NUMA_BALANCING */
-#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
-extern int migrate_misplaced_transhuge_page(struct mm_struct *mm,
- struct vm_area_struct *vma,
- pmd_t *pmd, pmd_t entry,
- unsigned long address,
- struct page *page, int node);
-#else
-static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
- struct vm_area_struct *vma,
- pmd_t *pmd, pmd_t entry,
- unsigned long address,
- struct page *page, int node)
-{
- return -EAGAIN;
-}
-#endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/
-
-
#ifdef CONFIG_MIGRATION
/*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b8bc39237dac..788a0b1323d0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -145,7 +145,7 @@ extern int mmap_rnd_compat_bits __read_mostly;
/* This function must be updated when the size of struct page grows above 80
* or reduces below 56. The idea that compiler optimizes out switch()
* statement, and only leaves move/store instructions. Also the compiler can
- * combine write statments if they are both assignments and can be reordered,
+ * combine write statements if they are both assignments and can be reordered,
* this can result in several of the writes here being dropped.
*/
#define mm_zero_struct_page(pp) __mm_zero_struct_page(pp)
@@ -540,7 +540,12 @@ struct vm_fault {
pud_t *pud; /* Pointer to pud entry matching
* the 'address'
*/
- pte_t orig_pte; /* Value of PTE at the time of fault */
+ union {
+ pte_t orig_pte; /* Value of PTE at the time of fault */
+ pmd_t orig_pmd; /* Value of PMD at the time of fault,
+ * used by PMD fault only.
+ */
+ };
struct page *cow_page; /* Page handler may use for COW fault */
struct page *page; /* ->fault handlers should return a
@@ -3067,6 +3072,11 @@ static inline void print_vma_addr(char *prefix, unsigned long rip)
}
#endif
+int vmemmap_remap_free(unsigned long start, unsigned long end,
+ unsigned long reuse);
+int vmemmap_remap_alloc(unsigned long start, unsigned long end,
+ unsigned long reuse, gfp_t gfp_mask);
+
void *sparse_buffer_alloc(unsigned long size);
struct page * __populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d33d97c69da9..52bbd2b7cb46 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -404,7 +404,7 @@ struct mm_struct {
unsigned long mmap_base; /* base of mmap area */
unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
- /* Base adresses for compatible mmap() */
+ /* Base addresses for compatible mmap() */
unsigned long mmap_compat_base;
unsigned long mmap_compat_legacy_base;
#endif
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 1a6a9eb6d3fa..6692da8d121d 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -41,7 +41,12 @@ struct mmu_interval_notifier;
*
* @MMU_NOTIFY_MIGRATE: used during migrate_vma_collect() invalidate to signal
* a device driver to possibly ignore the invalidation if the
- * migrate_pgmap_owner field matches the driver's device private pgmap owner.
+ * owner field matches the driver's device private pgmap owner.
+ *
+ * @MMU_NOTIFY_EXCLUSIVE: to signal a device driver that the device will no
+ * longer have exclusive access to the page. When sent during creation of an
+ * exclusive range the owner will be initialised to the value provided by the
+ * caller of make_device_exclusive_range(), otherwise the owner will be NULL.
*/
enum mmu_notifier_event {
MMU_NOTIFY_UNMAP = 0,
@@ -51,6 +56,7 @@ enum mmu_notifier_event {
MMU_NOTIFY_SOFT_DIRTY,
MMU_NOTIFY_RELEASE,
MMU_NOTIFY_MIGRATE,
+ MMU_NOTIFY_EXCLUSIVE,
};
#define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0)
@@ -269,7 +275,7 @@ struct mmu_notifier_range {
unsigned long end;
unsigned flags;
enum mmu_notifier_event event;
- void *migrate_pgmap_owner;
+ void *owner;
};
static inline int mm_has_notifiers(struct mm_struct *mm)
@@ -521,14 +527,14 @@ static inline void mmu_notifier_range_init(struct mmu_notifier_range *range,
range->flags = flags;
}
-static inline void mmu_notifier_range_init_migrate(
- struct mmu_notifier_range *range, unsigned int flags,
+static inline void mmu_notifier_range_init_owner(
+ struct mmu_notifier_range *range,
+ enum mmu_notifier_event event, unsigned int flags,
struct vm_area_struct *vma, struct mm_struct *mm,
- unsigned long start, unsigned long end, void *pgmap)
+ unsigned long start, unsigned long end, void *owner)
{
- mmu_notifier_range_init(range, MMU_NOTIFY_MIGRATE, flags, vma, mm,
- start, end);
- range->migrate_pgmap_owner = pgmap;
+ mmu_notifier_range_init(range, event, flags, vma, mm, start, end);
+ range->owner = owner;
}
#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \
@@ -655,8 +661,8 @@ static inline void _mmu_notifier_range_init(struct mmu_notifier_range *range,
#define mmu_notifier_range_init(range,event,flags,vma,mm,start,end) \
_mmu_notifier_range_init(range, start, end)
-#define mmu_notifier_range_init_migrate(range, flags, vma, mm, start, end, \
- pgmap) \
+#define mmu_notifier_range_init_owner(range, event, flags, vma, mm, start, \
+ end, owner) \
_mmu_notifier_range_init(range, start, end)
static inline bool
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 265a32e1ff74..fcb535560028 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -114,7 +114,7 @@ static inline bool free_area_empty(struct free_area *area, int migratetype)
struct pglist_data;
/*
- * Add a wild amount of padding here to ensure datas fall into separate
+ * Add a wild amount of padding here to ensure data fall into separate
* cachelines. There are very few zone structures in the machine, so space
* consumption is not a concern here.
*/
@@ -1064,7 +1064,10 @@ extern char numa_zonelist_order[];
#ifndef CONFIG_NUMA
extern struct pglist_data contig_page_data;
-#define NODE_DATA(nid) (&contig_page_data)
+static inline struct pglist_data *NODE_DATA(int nid)
+{
+ return &contig_page_data;
+}
#define NODE_MEM_MAP(nid) mem_map
#else /* CONFIG_NUMA */
@@ -1445,10 +1448,30 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
#endif
#ifndef CONFIG_HAVE_ARCH_PFN_VALID
+/**
+ * pfn_valid - check if there is a valid memory map entry for a PFN
+ * @pfn: the page frame number to check
+ *
+ * Check if there is a valid memory map entry aka struct page for the @pfn.
+ * Note, that availability of the memory map entry does not imply that
+ * there is actual usable memory at that @pfn. The struct page may
+ * represent a hole or an unusable page frame.
+ *
+ * Return: 1 for PFNs that have memory map entries and 0 otherwise
+ */
static inline int pfn_valid(unsigned long pfn)
{
struct mem_section *ms;
+ /*
+ * Ensure the upper PAGE_SHIFT bits are clear in the
+ * pfn. Else it might lead to false positives when
+ * some of the upper bits are set, but the lower bits
+ * match a valid pfn.
+ */
+ if (PHYS_PFN(PFN_PHYS(pfn)) != pfn)
+ return 0;
+
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
ms = __nr_to_section(pfn_to_section_nr(pfn));
diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index 3e5358f4de2f..eb0d1c1db208 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -200,7 +200,7 @@ struct mpi_ec_ctx {
unsigned int nbits; /* Number of bits. */
/* Domain parameters. Note that they may not all be set and if set
- * the MPIs may be flaged as constant.
+ * the MPIs may be flagged as constant.
*/
MPI p; /* Prime specifying the field GF(p). */
MPI a; /* First coefficient of the Weierstrass equation. */
@@ -267,7 +267,7 @@ int mpi_ec_curve_point(MPI_POINT point, struct mpi_ec_ctx *ctx);
/**
* mpi_get_size() - returns max size required to store the number
*
- * @a: A multi precision integer for which we want to allocate a bufer
+ * @a: A multi precision integer for which we want to allocate a buffer
*
* Return: size required to store the number
*/
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 458696550028..5922031ffab6 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -704,6 +704,18 @@ PAGEFLAG_FALSE(DoubleMap)
#endif
/*
+ * Check if a page is currently marked HWPoisoned. Note that this check is
+ * best effort only and inherently racy: there is no way to synchronize with
+ * failing hardware.
+ */
+static inline bool is_page_hwpoison(struct page *page)
+{
+ if (PageHWPoison(page))
+ return true;
+ return PageHuge(page) && PageHWPoison(compound_head(page));
+}
+
+/*
* For pages that are never mapped to userspace (and aren't PageSlab),
* page_type may be used. Because it is initialised to -1, we invert the
* sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
@@ -766,9 +778,19 @@ PAGE_TYPE_OPS(Buddy, buddy)
* relies on this feature is aware that re-onlining the memory block will
* require to re-set the pages PageOffline() and not giving them to the
* buddy via online_page_callback_t.
+ *
+ * There are drivers that mark a page PageOffline() and expect there won't be
+ * any further access to page content. PFN walkers that read content of random
+ * pages should check PageOffline() and synchronize with such drivers using
+ * page_offline_freeze()/page_offline_thaw().
*/
PAGE_TYPE_OPS(Offline, offline)
+extern void page_offline_freeze(void);
+extern void page_offline_thaw(void);
+extern void page_offline_begin(void);
+extern void page_offline_end(void);
+
/*
* Marks pages in use as page tables.
*/
diff --git a/include/linux/panic.h b/include/linux/panic.h
new file mode 100644
index 000000000000..f5844908a089
--- /dev/null
+++ b/include/linux/panic.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PANIC_H
+#define _LINUX_PANIC_H
+
+#include <linux/compiler_attributes.h>
+#include <linux/types.h>
+
+struct pt_regs;
+
+extern long (*panic_blink)(int state);
+__printf(1, 2)
+void panic(const char *fmt, ...) __noreturn __cold;
+void nmi_panic(struct pt_regs *regs, const char *msg);
+extern void oops_enter(void);
+extern void oops_exit(void);
+extern bool oops_may_print(void);
+
+#ifdef CONFIG_SMP
+extern unsigned int sysctl_oops_all_cpu_backtrace;
+#else
+#define sysctl_oops_all_cpu_backtrace 0
+#endif /* CONFIG_SMP */
+
+extern int panic_timeout;
+extern unsigned long panic_print;
+extern int panic_on_oops;
+extern int panic_on_unrecovered_nmi;
+extern int panic_on_io_nmi;
+extern int panic_on_warn;
+
+extern unsigned long panic_on_taint;
+extern bool panic_on_taint_nousertaint;
+
+extern int sysctl_panic_on_rcu_stall;
+extern int sysctl_max_rcu_stall_to_panic;
+extern int sysctl_panic_on_stackoverflow;
+
+extern bool crash_kexec_post_notifiers;
+
+/*
+ * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It
+ * holds a CPU number which is executing panic() currently. A value of
+ * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec().
+ */
+extern atomic_t panic_cpu;
+#define PANIC_CPU_INVALID -1
+
+/*
+ * Only to be used by arch init code. If the user over-wrote the default
+ * CONFIG_PANIC_TIMEOUT, honor it.
+ */
+static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout)
+{
+ if (panic_timeout == arch_default_timeout)
+ panic_timeout = timeout;
+}
+
+/* This cannot be an enum because some may be used in assembly source. */
+#define TAINT_PROPRIETARY_MODULE 0
+#define TAINT_FORCED_MODULE 1
+#define TAINT_CPU_OUT_OF_SPEC 2
+#define TAINT_FORCED_RMMOD 3
+#define TAINT_MACHINE_CHECK 4
+#define TAINT_BAD_PAGE 5
+#define TAINT_USER 6
+#define TAINT_DIE 7
+#define TAINT_OVERRIDDEN_ACPI_TABLE 8
+#define TAINT_WARN 9
+#define TAINT_CRAP 10
+#define TAINT_FIRMWARE_WORKAROUND 11
+#define TAINT_OOT_MODULE 12
+#define TAINT_UNSIGNED_MODULE 13
+#define TAINT_SOFTLOCKUP 14
+#define TAINT_LIVEPATCH 15
+#define TAINT_AUX 16
+#define TAINT_RANDSTRUCT 17
+#define TAINT_FLAGS_COUNT 18
+#define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1)
+
+struct taint_flag {
+ char c_true; /* character printed when tainted */
+ char c_false; /* character printed when not tainted */
+ bool module; /* also show as a per-module taint flag */
+};
+
+extern const struct taint_flag taint_flags[TAINT_FLAGS_COUNT];
+
+enum lockdep_ok {
+ LOCKDEP_STILL_OK,
+ LOCKDEP_NOW_UNRELIABLE,
+};
+
+extern const char *print_tainted(void);
+extern void add_taint(unsigned flag, enum lockdep_ok);
+extern int test_taint(unsigned flag);
+extern unsigned long get_taint(void);
+
+#endif /* _LINUX_PANIC_H */
diff --git a/include/linux/panic_notifier.h b/include/linux/panic_notifier.h
new file mode 100644
index 000000000000..41e32483d7a7
--- /dev/null
+++ b/include/linux/panic_notifier.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PANIC_NOTIFIERS_H
+#define _LINUX_PANIC_NOTIFIERS_H
+
+#include <linux/notifier.h>
+#include <linux/types.h>
+
+extern struct atomic_notifier_head panic_notifier_list;
+
+extern bool crash_kexec_post_notifiers;
+
+#endif /* _LINUX_PANIC_NOTIFIERS_H */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index c32600c9e1ad..e82660f7b9e4 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -29,6 +29,24 @@
#endif
/*
+ * This defines the first usable user address. Platforms
+ * can override its value with custom FIRST_USER_ADDRESS
+ * defined in their respective <asm/pgtable.h>.
+ */
+#ifndef FIRST_USER_ADDRESS
+#define FIRST_USER_ADDRESS 0UL
+#endif
+
+/*
+ * This defines the generic helper for accessing PMD page
+ * table page. Although platforms can still override this
+ * via their respective <asm/pgtable.h>.
+ */
+#ifndef pmd_pgtable
+#define pmd_pgtable(pmd) pmd_page(pmd)
+#endif
+
+/*
* A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD]
*
* The pXx_index() functions return the index of the entry in the page
@@ -1379,10 +1397,34 @@ static inline int p4d_clear_huge(p4d_t *p4d)
}
#endif /* !__PAGETABLE_P4D_FOLDED */
+#ifndef __PAGETABLE_PUD_FOLDED
int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
-int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
int pud_clear_huge(pud_t *pud);
+#else
+static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
+{
+ return 0;
+}
+static inline int pud_clear_huge(pud_t *pud)
+{
+ return 0;
+}
+#endif /* !__PAGETABLE_PUD_FOLDED */
+
+#ifndef __PAGETABLE_PMD_FOLDED
+int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
int pmd_clear_huge(pmd_t *pmd);
+#else
+static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
+{
+ return 0;
+}
+static inline int pmd_clear_huge(pmd_t *pmd)
+{
+ return 0;
+}
+#endif /* !__PAGETABLE_PMD_FOLDED */
+
int p4d_free_pud_page(p4d_t *p4d, unsigned long addr);
int pud_free_pmd_page(pud_t *pud, unsigned long addr);
int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 8d04e7deedc6..83fb86133fe1 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -86,9 +86,6 @@ struct anon_vma_chain {
};
enum ttu_flags {
- TTU_MIGRATION = 0x1, /* migration mode */
- TTU_MUNLOCK = 0x2, /* munlock mode */
-
TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */
TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */
TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */
@@ -98,7 +95,6 @@ enum ttu_flags {
* do a final flush if necessary */
TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock:
* caller holds it */
- TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */
};
#ifdef CONFIG_MMU
@@ -195,7 +191,12 @@ static inline void page_dup_rmap(struct page *page, bool compound)
int page_referenced(struct page *, int is_locked,
struct mem_cgroup *memcg, unsigned long *vm_flags);
-bool try_to_unmap(struct page *, enum ttu_flags flags);
+void try_to_migrate(struct page *page, enum ttu_flags flags);
+void try_to_unmap(struct page *, enum ttu_flags flags);
+
+int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, struct page **pages,
+ void *arg);
/* Avoid racy checks */
#define PVMW_SYNC (1 << 0)
@@ -240,7 +241,7 @@ int page_mkclean(struct page *);
* called in munlock()/munmap() path to check for other vmas holding
* the page mlocked.
*/
-void try_to_munlock(struct page *);
+void page_mlock(struct page *page);
void remove_migration_ptes(struct page *old, struct page *new, bool locked);
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 723b1fa1177e..dd99569595fd 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -126,8 +126,16 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
void seq_put_hex_ll(struct seq_file *m, const char *delimiter,
unsigned long long v, unsigned int width);
+void seq_escape_mem(struct seq_file *m, const char *src, size_t len,
+ unsigned int flags, const char *esc);
+
+static inline void seq_escape_str(struct seq_file *m, const char *src,
+ unsigned int flags, const char *esc)
+{
+ seq_escape_mem(m, src, strlen(src), flags, esc);
+}
+
void seq_escape(struct seq_file *m, const char *s, const char *esc);
-void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz);
void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
int rowsize, int groupsize, const void *buf, size_t len,
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index aa77dcd1646f..8e775ce517bb 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -122,21 +122,18 @@ static inline bool shmem_file(struct file *file)
extern bool shmem_charge(struct inode *inode, long pages);
extern void shmem_uncharge(struct inode *inode, long pages);
+#ifdef CONFIG_USERFAULTFD
#ifdef CONFIG_SHMEM
-extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
+extern int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
unsigned long dst_addr,
unsigned long src_addr,
+ bool zeropage,
struct page **pagep);
-extern int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm,
- pmd_t *dst_pmd,
- struct vm_area_struct *dst_vma,
- unsigned long dst_addr);
-#else
-#define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
- src_addr, pagep) ({ BUG(); 0; })
-#define shmem_mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, \
- dst_addr) ({ BUG(); 0; })
-#endif
+#else /* !CONFIG_SHMEM */
+#define shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, \
+ src_addr, zeropage, pagep) ({ BUG(); 0; })
+#endif /* CONFIG_SHMEM */
+#endif /* CONFIG_USERFAULTFD */
#endif
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 5160fd45e5ca..3454c7ff0778 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -462,8 +462,6 @@ int __save_altstack(stack_t __user *, unsigned long);
unsafe_put_user((void __user *)t->sas_ss_sp, &__uss->ss_sp, label); \
unsafe_put_user(t->sas_ss_flags, &__uss->ss_flags, label); \
unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \
- if (t->sas_ss_flags & SS_AUTODISARM) \
- sas_ss_reset(t); \
} while (0);
#ifdef CONFIG_PROC_FS
diff --git a/include/linux/string.h b/include/linux/string.h
index 9521d8cab18e..b48d2d28e0b1 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -2,7 +2,6 @@
#ifndef _LINUX_STRING_H_
#define _LINUX_STRING_H_
-
#include <linux/compiler.h> /* for inline */
#include <linux/types.h> /* for size_t */
#include <linux/stddef.h> /* for NULL */
@@ -184,12 +183,6 @@ extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
extern void argv_free(char **argv);
extern bool sysfs_streq(const char *s1, const char *s2);
-extern int kstrtobool(const char *s, bool *res);
-static inline int strtobool(const char *s, bool *res)
-{
- return kstrtobool(s, res);
-}
-
int match_string(const char * const *array, size_t n, const char *string);
int __sysfs_match_string(const char * const *array, size_t n, const char *s);
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index fa06dcdc481e..68189c4a2eb1 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_STRING_HELPERS_H_
#define _LINUX_STRING_HELPERS_H_
+#include <linux/bits.h>
#include <linux/ctype.h>
#include <linux/types.h>
@@ -18,13 +19,15 @@ enum string_size_units {
void string_get_size(u64 size, u64 blk_size, enum string_size_units units,
char *buf, int len);
-#define UNESCAPE_SPACE 0x01
-#define UNESCAPE_OCTAL 0x02
-#define UNESCAPE_HEX 0x04
-#define UNESCAPE_SPECIAL 0x08
+#define UNESCAPE_SPACE BIT(0)
+#define UNESCAPE_OCTAL BIT(1)
+#define UNESCAPE_HEX BIT(2)
+#define UNESCAPE_SPECIAL BIT(3)
#define UNESCAPE_ANY \
(UNESCAPE_SPACE | UNESCAPE_OCTAL | UNESCAPE_HEX | UNESCAPE_SPECIAL)
+#define UNESCAPE_ALL_MASK GENMASK(3, 0)
+
int string_unescape(char *src, char *dst, size_t size, unsigned int flags);
static inline int string_unescape_inplace(char *buf, unsigned int flags)
@@ -42,22 +45,24 @@ static inline int string_unescape_any_inplace(char *buf)
return string_unescape_any(buf, buf, 0);
}
-#define ESCAPE_SPACE 0x01
-#define ESCAPE_SPECIAL 0x02
-#define ESCAPE_NULL 0x04
-#define ESCAPE_OCTAL 0x08
+#define ESCAPE_SPACE BIT(0)
+#define ESCAPE_SPECIAL BIT(1)
+#define ESCAPE_NULL BIT(2)
+#define ESCAPE_OCTAL BIT(3)
#define ESCAPE_ANY \
(ESCAPE_SPACE | ESCAPE_OCTAL | ESCAPE_SPECIAL | ESCAPE_NULL)
-#define ESCAPE_NP 0x10
+#define ESCAPE_NP BIT(4)
#define ESCAPE_ANY_NP (ESCAPE_ANY | ESCAPE_NP)
-#define ESCAPE_HEX 0x20
+#define ESCAPE_HEX BIT(5)
+#define ESCAPE_NA BIT(6)
+#define ESCAPE_NAP BIT(7)
+#define ESCAPE_APPEND BIT(8)
+
+#define ESCAPE_ALL_MASK GENMASK(8, 0)
int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
unsigned int flags, const char *only);
-int string_escape_mem_ascii(const char *src, size_t isz, char *dst,
- size_t osz);
-
static inline int string_escape_mem_any_np(const char *src, size_t isz,
char *dst, size_t osz, const char *only)
{
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index d0965e2997b0..b134b2b3371c 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -14,6 +14,7 @@
#include <linux/kref.h>
#include <linux/slab.h>
#include <linux/atomic.h>
+#include <linux/kstrtox.h>
#include <linux/proc_fs.h>
/*
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 49b1dd2c100b..6f5a43251593 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -62,12 +62,17 @@ static inline int current_is_kswapd(void)
* migrate part of a process memory to device memory.
*
* When a page is migrated from CPU to device, we set the CPU page table entry
- * to a special SWP_DEVICE_* entry.
+ * to a special SWP_DEVICE_{READ|WRITE} entry.
+ *
+ * When a page is mapped by the device for exclusive access we set the CPU page
+ * table entries to special SWP_DEVICE_EXCLUSIVE_* entries.
*/
#ifdef CONFIG_DEVICE_PRIVATE
-#define SWP_DEVICE_NUM 2
+#define SWP_DEVICE_NUM 4
#define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM)
#define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1)
+#define SWP_DEVICE_EXCLUSIVE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2)
+#define SWP_DEVICE_EXCLUSIVE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3)
#else
#define SWP_DEVICE_NUM 0
#endif
@@ -537,7 +542,11 @@ static inline void put_swap_device(struct swap_info_struct *si)
{
}
-#define swap_address_space(entry) (NULL)
+static inline struct address_space *swap_address_space(swp_entry_t entry)
+{
+ return NULL;
+}
+
#define get_nr_swap_pages() 0L
#define total_swap_pages 0L
#define total_swapcache_pages() 0UL
@@ -560,8 +569,8 @@ static inline void show_swap_cache_info(void)
{
}
-#define free_swap_and_cache(e) ({(is_migration_entry(e) || is_device_private_entry(e));})
-#define swapcache_prepare(e) ({(is_migration_entry(e) || is_device_private_entry(e));})
+/* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
+#define free_swap_and_cache(e) is_pfn_swap_entry(e)
static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
{
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 5907205c712c..d356ab4047f7 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -107,10 +107,14 @@ static inline void *swp_to_radix_entry(swp_entry_t entry)
}
#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
-static inline swp_entry_t make_device_private_entry(struct page *page, bool write)
+static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset)
{
- return swp_entry(write ? SWP_DEVICE_WRITE : SWP_DEVICE_READ,
- page_to_pfn(page));
+ return swp_entry(SWP_DEVICE_READ, offset);
+}
+
+static inline swp_entry_t make_writable_device_private_entry(pgoff_t offset)
+{
+ return swp_entry(SWP_DEVICE_WRITE, offset);
}
static inline bool is_device_private_entry(swp_entry_t entry)
@@ -119,33 +123,40 @@ static inline bool is_device_private_entry(swp_entry_t entry)
return type == SWP_DEVICE_READ || type == SWP_DEVICE_WRITE;
}
-static inline void make_device_private_entry_read(swp_entry_t *entry)
+static inline bool is_writable_device_private_entry(swp_entry_t entry)
{
- *entry = swp_entry(SWP_DEVICE_READ, swp_offset(*entry));
+ return unlikely(swp_type(entry) == SWP_DEVICE_WRITE);
}
-static inline bool is_write_device_private_entry(swp_entry_t entry)
+static inline swp_entry_t make_readable_device_exclusive_entry(pgoff_t offset)
{
- return unlikely(swp_type(entry) == SWP_DEVICE_WRITE);
+ return swp_entry(SWP_DEVICE_EXCLUSIVE_READ, offset);
}
-static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry)
+static inline swp_entry_t make_writable_device_exclusive_entry(pgoff_t offset)
{
- return swp_offset(entry);
+ return swp_entry(SWP_DEVICE_EXCLUSIVE_WRITE, offset);
+}
+
+static inline bool is_device_exclusive_entry(swp_entry_t entry)
+{
+ return swp_type(entry) == SWP_DEVICE_EXCLUSIVE_READ ||
+ swp_type(entry) == SWP_DEVICE_EXCLUSIVE_WRITE;
}
-static inline struct page *device_private_entry_to_page(swp_entry_t entry)
+static inline bool is_writable_device_exclusive_entry(swp_entry_t entry)
{
- return pfn_to_page(swp_offset(entry));
+ return unlikely(swp_type(entry) == SWP_DEVICE_EXCLUSIVE_WRITE);
}
#else /* CONFIG_DEVICE_PRIVATE */
-static inline swp_entry_t make_device_private_entry(struct page *page, bool write)
+static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset)
{
return swp_entry(0, 0);
}
-static inline void make_device_private_entry_read(swp_entry_t *entry)
+static inline swp_entry_t make_writable_device_private_entry(pgoff_t offset)
{
+ return swp_entry(0, 0);
}
static inline bool is_device_private_entry(swp_entry_t entry)
@@ -153,61 +164,52 @@ static inline bool is_device_private_entry(swp_entry_t entry)
return false;
}
-static inline bool is_write_device_private_entry(swp_entry_t entry)
+static inline bool is_writable_device_private_entry(swp_entry_t entry)
{
return false;
}
-static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry)
+static inline swp_entry_t make_readable_device_exclusive_entry(pgoff_t offset)
{
- return 0;
+ return swp_entry(0, 0);
}
-static inline struct page *device_private_entry_to_page(swp_entry_t entry)
+static inline swp_entry_t make_writable_device_exclusive_entry(pgoff_t offset)
{
- return NULL;
+ return swp_entry(0, 0);
}
-#endif /* CONFIG_DEVICE_PRIVATE */
-#ifdef CONFIG_MIGRATION
-static inline swp_entry_t make_migration_entry(struct page *page, int write)
+static inline bool is_device_exclusive_entry(swp_entry_t entry)
{
- BUG_ON(!PageLocked(compound_head(page)));
+ return false;
+}
- return swp_entry(write ? SWP_MIGRATION_WRITE : SWP_MIGRATION_READ,
- page_to_pfn(page));
+static inline bool is_writable_device_exclusive_entry(swp_entry_t entry)
+{
+ return false;
}
+#endif /* CONFIG_DEVICE_PRIVATE */
+#ifdef CONFIG_MIGRATION
static inline int is_migration_entry(swp_entry_t entry)
{
return unlikely(swp_type(entry) == SWP_MIGRATION_READ ||
swp_type(entry) == SWP_MIGRATION_WRITE);
}
-static inline int is_write_migration_entry(swp_entry_t entry)
+static inline int is_writable_migration_entry(swp_entry_t entry)
{
return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE);
}
-static inline unsigned long migration_entry_to_pfn(swp_entry_t entry)
+static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
{
- return swp_offset(entry);
+ return swp_entry(SWP_MIGRATION_READ, offset);
}
-static inline struct page *migration_entry_to_page(swp_entry_t entry)
+static inline swp_entry_t make_writable_migration_entry(pgoff_t offset)
{
- struct page *p = pfn_to_page(swp_offset(entry));
- /*
- * Any use of migration entries may only occur while the
- * corresponding page is locked
- */
- BUG_ON(!PageLocked(compound_head(p)));
- return p;
-}
-
-static inline void make_migration_entry_read(swp_entry_t *entry)
-{
- *entry = swp_entry(SWP_MIGRATION_READ, swp_offset(*entry));
+ return swp_entry(SWP_MIGRATION_WRITE, offset);
}
extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
@@ -217,37 +219,58 @@ extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
extern void migration_entry_wait_huge(struct vm_area_struct *vma,
struct mm_struct *mm, pte_t *pte);
#else
-
-#define make_migration_entry(page, write) swp_entry(0, 0)
-static inline int is_migration_entry(swp_entry_t swp)
+static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
{
- return 0;
+ return swp_entry(0, 0);
}
-static inline unsigned long migration_entry_to_pfn(swp_entry_t entry)
+static inline swp_entry_t make_writable_migration_entry(pgoff_t offset)
{
- return 0;
+ return swp_entry(0, 0);
}
-static inline struct page *migration_entry_to_page(swp_entry_t entry)
+static inline int is_migration_entry(swp_entry_t swp)
{
- return NULL;
+ return 0;
}
-static inline void make_migration_entry_read(swp_entry_t *entryp) { }
static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
spinlock_t *ptl) { }
static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
unsigned long address) { }
static inline void migration_entry_wait_huge(struct vm_area_struct *vma,
struct mm_struct *mm, pte_t *pte) { }
-static inline int is_write_migration_entry(swp_entry_t entry)
+static inline int is_writable_migration_entry(swp_entry_t entry)
{
return 0;
}
#endif
+static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
+{
+ struct page *p = pfn_to_page(swp_offset(entry));
+
+ /*
+ * Any use of migration entries may only occur while the
+ * corresponding page is locked
+ */
+ BUG_ON(is_migration_entry(entry) && !PageLocked(p));
+
+ return p;
+}
+
+/*
+ * A pfn swap entry is a special type of swap entry that always has a pfn stored
+ * in the swap offset. They are used to represent unaddressable device memory
+ * and to restrict access to a page undergoing migration.
+ */
+static inline bool is_pfn_swap_entry(swp_entry_t entry)
+{
+ return is_migration_entry(entry) || is_device_private_entry(entry) ||
+ is_device_exclusive_entry(entry);
+}
+
struct page_vma_mapped_walk;
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
@@ -265,6 +288,8 @@ static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
if (pmd_swp_soft_dirty(pmd))
pmd = pmd_swp_clear_soft_dirty(pmd);
+ if (pmd_swp_uffd_wp(pmd))
+ pmd = pmd_swp_clear_uffd_wp(pmd);
arch_entry = __pmd_to_swp_entry(pmd);
return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
}
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 157762db9d4b..0999f6317978 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -9,6 +9,7 @@
#define _LINUX_THREAD_INFO_H
#include <linux/types.h>
+#include <linux/limits.h>
#include <linux/bug.h>
#include <linux/restart_block.h>
#include <linux/errno.h>
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 794d1538b8ba..331d2ccf0bcc 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -53,6 +53,11 @@ enum mcopy_atomic_mode {
MCOPY_ATOMIC_CONTINUE,
};
+extern int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr, struct page *page,
+ bool newly_allocated, bool wp_copy);
+
extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
unsigned long src_start, unsigned long len,
bool *mmap_changing, __u64 mode);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index bfaaf0b6fa76..1dabd6f22486 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -104,6 +104,21 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
}
#endif
+#ifndef arch_vmap_pte_range_map_size
+static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end,
+ u64 pfn, unsigned int max_page_shift)
+{
+ return PAGE_SIZE;
+}
+#endif
+
+#ifndef arch_vmap_pte_supported_shift
+static inline int arch_vmap_pte_supported_shift(unsigned long size)
+{
+ return PAGE_SHIFT;
+}
+#endif
+
/*
* Highlevel APIs for driver use
*/
diff --git a/include/linux/zbud.h b/include/linux/zbud.h
deleted file mode 100644
index b1eaf6e31735..000000000000
--- a/include/linux/zbud.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ZBUD_H_
-#define _ZBUD_H_
-
-#include <linux/types.h>
-
-struct zbud_pool;
-
-struct zbud_ops {
- int (*evict)(struct zbud_pool *pool, unsigned long handle);
-};
-
-struct zbud_pool *zbud_create_pool(gfp_t gfp, const struct zbud_ops *ops);
-void zbud_destroy_pool(struct zbud_pool *pool);
-int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
- unsigned long *handle);
-void zbud_free(struct zbud_pool *pool, unsigned long handle);
-int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
-void *zbud_map(struct zbud_pool *pool, unsigned long handle);
-void zbud_unmap(struct zbud_pool *pool, unsigned long handle);
-u64 zbud_get_pool_size(struct zbud_pool *pool);
-
-#endif /* _ZBUD_H_ */
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 00d1180527d8..88faf2400ec2 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -423,47 +423,6 @@ TRACE_EVENT(mm_vmscan_lru_shrink_active,
show_reclaim_flags(__entry->reclaim_flags))
);
-TRACE_EVENT(mm_vmscan_inactive_list_is_low,
-
- TP_PROTO(int nid, int reclaim_idx,
- unsigned long total_inactive, unsigned long inactive,
- unsigned long total_active, unsigned long active,
- unsigned long ratio, int file),
-
- TP_ARGS(nid, reclaim_idx, total_inactive, inactive, total_active, active, ratio, file),
-
- TP_STRUCT__entry(
- __field(int, nid)
- __field(int, reclaim_idx)
- __field(unsigned long, total_inactive)
- __field(unsigned long, inactive)
- __field(unsigned long, total_active)
- __field(unsigned long, active)
- __field(unsigned long, ratio)
- __field(int, reclaim_flags)
- ),
-
- TP_fast_assign(
- __entry->nid = nid;
- __entry->reclaim_idx = reclaim_idx;
- __entry->total_inactive = total_inactive;
- __entry->inactive = inactive;
- __entry->total_active = total_active;
- __entry->active = active;
- __entry->ratio = ratio;
- __entry->reclaim_flags = trace_reclaim_flags(file) &
- RECLAIM_WB_LRU;
- ),
-
- TP_printk("nid=%d reclaim_idx=%d total_inactive=%ld inactive=%ld total_active=%ld active=%ld ratio=%ld flags=%s",
- __entry->nid,
- __entry->reclaim_idx,
- __entry->total_inactive, __entry->inactive,
- __entry->total_active, __entry->active,
- __entry->ratio,
- show_reclaim_flags(__entry->reclaim_flags))
-);
-
TRACE_EVENT(mm_vmscan_node_reclaim_begin,
TP_PROTO(int nid, int order, gfp_t gfp_flags),
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index f94f65d429be..1567a3294c3d 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -72,6 +72,9 @@
#define MADV_COLD 20 /* deactivate these pages */
#define MADV_PAGEOUT 21 /* reclaim these pages */
+#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
+#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
+
/* compatibility flags */
#define MAP_FILE 0
diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 4832fd0b5642..19a00bc7fe86 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -60,7 +60,6 @@ enum {
* are never OR'ed into the mode in mempolicy API arguments.
*/
#define MPOL_F_SHARED (1 << 0) /* identify shared policies */
-#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */
#define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */
#define MPOL_F_MORON (1 << 4) /* Migrate On protnone Reference On Node */
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 650480f41f1d..05b31d60acf6 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -31,7 +31,8 @@
UFFD_FEATURE_MISSING_SHMEM | \
UFFD_FEATURE_SIGBUS | \
UFFD_FEATURE_THREAD_ID | \
- UFFD_FEATURE_MINOR_HUGETLBFS)
+ UFFD_FEATURE_MINOR_HUGETLBFS | \
+ UFFD_FEATURE_MINOR_SHMEM)
#define UFFD_API_IOCTLS \
((__u64)1 << _UFFDIO_REGISTER | \
(__u64)1 << _UFFDIO_UNREGISTER | \
@@ -185,6 +186,9 @@ struct uffdio_api {
* UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults
* can be intercepted (via REGISTER_MODE_MINOR) for
* hugetlbfs-backed pages.
+ *
+ * UFFD_FEATURE_MINOR_SHMEM indicates the same support as
+ * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead.
*/
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
#define UFFD_FEATURE_EVENT_FORK (1<<1)
@@ -196,6 +200,7 @@ struct uffdio_api {
#define UFFD_FEATURE_SIGBUS (1<<7)
#define UFFD_FEATURE_THREAD_ID (1<<8)
#define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9)
+#define UFFD_FEATURE_MINOR_SHMEM (1<<10)
__u64 features;
__u64 ioctls;