summaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/buffer_head.h4
-rw-r--r--include/linux/cache.h6
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--include/linux/compaction.h104
-rw-r--r--include/linux/dma-map-ops.h61
-rw-r--r--include/linux/dma-mapping.h5
-rw-r--r--include/linux/fault-inject.h9
-rw-r--r--include/linux/frontswap.h2
-rw-r--r--include/linux/fs.h7
-rw-r--r--include/linux/gfp.h15
-rw-r--r--include/linux/hugetlb.h33
-rw-r--r--include/linux/iio/iio.h2
-rw-r--r--include/linux/kasan.h2
-rw-r--r--include/linux/maple_tree.h130
-rw-r--r--include/linux/memblock.h1
-rw-r--r--include/linux/memcontrol.h24
-rw-r--r--include/linux/memory_hotplug.h8
-rw-r--r--include/linux/migrate.h20
-rw-r--r--include/linux/mm.h226
-rw-r--r--include/linux/mm_inline.h14
-rw-r--r--include/linux/mm_types.h23
-rw-r--r--include/linux/mmdebug.h14
-rw-r--r--include/linux/mmzone.h51
-rw-r--r--include/linux/page-isolation.h23
-rw-r--r--include/linux/pagemap.h6
-rw-r--r--include/linux/pagevec.h67
-rw-r--r--include/linux/pgtable.h176
-rw-r--r--include/linux/ramfs.h1
-rw-r--r--include/linux/scatterlist.h84
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/slab.h14
-rw-r--r--include/linux/sunrpc/svc.h2
-rw-r--r--include/linux/suspend.h9
-rw-r--r--include/linux/swap.h29
-rw-r--r--include/linux/swapops.h17
-rw-r--r--include/linux/syscalls.h5
-rw-r--r--include/linux/userfaultfd_k.h6
-rw-r--r--include/linux/zpool.h20
38 files changed, 587 insertions, 637 deletions
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 1520793c72da..c794ea7096ba 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -263,7 +263,7 @@ extern int buffer_heads_over_limit;
void block_invalidate_folio(struct folio *folio, size_t offset, size_t length);
int block_write_full_page(struct page *page, get_block_t *get_block,
struct writeback_control *wbc);
-int __block_write_full_page(struct inode *inode, struct page *page,
+int __block_write_full_folio(struct inode *inode, struct folio *folio,
get_block_t *get_block, struct writeback_control *wbc,
bh_end_io_t *handler);
int block_read_full_folio(struct folio *, get_block_t *);
@@ -278,7 +278,7 @@ int block_write_end(struct file *, struct address_space *,
int generic_write_end(struct file *, struct address_space *,
loff_t, unsigned, unsigned,
struct page *, void *);
-void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
+void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to);
void clean_page_buffers(struct page *page);
int cont_write_begin(struct file *, struct address_space *, loff_t,
unsigned, struct page **, void **,
diff --git a/include/linux/cache.h b/include/linux/cache.h
index 5da1bbd96154..9900d20b76c2 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -98,4 +98,10 @@ struct cacheline_padding {
#define CACHELINE_PADDING(name)
#endif
+#ifdef ARCH_DMA_MINALIGN
+#define ARCH_HAS_DMA_MINALIGN
+#else
+#define ARCH_DMA_MINALIGN __alignof__(unsigned long long)
+#endif
+
#endif /* __LINUX_CACHE_H */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1261a47932a6..b307013b9c6c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -691,7 +691,6 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
*/
void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
void cgroup_rstat_flush(struct cgroup *cgrp);
-void cgroup_rstat_flush_atomic(struct cgroup *cgrp);
void cgroup_rstat_flush_hold(struct cgroup *cgrp);
void cgroup_rstat_flush_release(void);
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index a6e512cfb670..e94776496049 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -89,89 +89,17 @@ extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
const struct alloc_context *ac, enum compact_priority prio,
struct page **page);
extern void reset_isolation_suitable(pg_data_t *pgdat);
-extern enum compact_result compaction_suitable(struct zone *zone, int order,
- unsigned int alloc_flags, int highest_zoneidx);
+extern bool compaction_suitable(struct zone *zone, int order,
+ int highest_zoneidx);
extern void compaction_defer_reset(struct zone *zone, int order,
bool alloc_success);
-/* Compaction has made some progress and retrying makes sense */
-static inline bool compaction_made_progress(enum compact_result result)
-{
- /*
- * Even though this might sound confusing this in fact tells us
- * that the compaction successfully isolated and migrated some
- * pageblocks.
- */
- if (result == COMPACT_SUCCESS)
- return true;
-
- return false;
-}
-
-/* Compaction has failed and it doesn't make much sense to keep retrying. */
-static inline bool compaction_failed(enum compact_result result)
-{
- /* All zones were scanned completely and still not result. */
- if (result == COMPACT_COMPLETE)
- return true;
-
- return false;
-}
-
-/* Compaction needs reclaim to be performed first, so it can continue. */
-static inline bool compaction_needs_reclaim(enum compact_result result)
-{
- /*
- * Compaction backed off due to watermark checks for order-0
- * so the regular reclaim has to try harder and reclaim something.
- */
- if (result == COMPACT_SKIPPED)
- return true;
-
- return false;
-}
-
-/*
- * Compaction has backed off for some reason after doing some work or none
- * at all. It might be throttling or lock contention. Retrying might be still
- * worthwhile, but with a higher priority if allowed.
- */
-static inline bool compaction_withdrawn(enum compact_result result)
-{
- /*
- * If compaction is deferred for high-order allocations, it is
- * because sync compaction recently failed. If this is the case
- * and the caller requested a THP allocation, we do not want
- * to heavily disrupt the system, so we fail the allocation
- * instead of entering direct reclaim.
- */
- if (result == COMPACT_DEFERRED)
- return true;
-
- /*
- * If compaction in async mode encounters contention or blocks higher
- * priority task we back off early rather than cause stalls.
- */
- if (result == COMPACT_CONTENDED)
- return true;
-
- /*
- * Page scanners have met but we haven't scanned full zones so this
- * is a back off in fact.
- */
- if (result == COMPACT_PARTIAL_SKIPPED)
- return true;
-
- return false;
-}
-
-
bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
int alloc_flags);
-extern void kcompactd_run(int nid);
-extern void kcompactd_stop(int nid);
+extern void __meminit kcompactd_run(int nid);
+extern void __meminit kcompactd_stop(int nid);
extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int highest_zoneidx);
#else
@@ -179,32 +107,12 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat)
{
}
-static inline enum compact_result compaction_suitable(struct zone *zone, int order,
- int alloc_flags, int highest_zoneidx)
-{
- return COMPACT_SKIPPED;
-}
-
-static inline bool compaction_made_progress(enum compact_result result)
-{
- return false;
-}
-
-static inline bool compaction_failed(enum compact_result result)
-{
- return false;
-}
-
-static inline bool compaction_needs_reclaim(enum compact_result result)
+static inline bool compaction_suitable(struct zone *zone, int order,
+ int highest_zoneidx)
{
return false;
}
-static inline bool compaction_withdrawn(enum compact_result result)
-{
- return true;
-}
-
static inline void kcompactd_run(int nid)
{
}
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 31f114f486c4..9bf19b5bf755 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -8,6 +8,7 @@
#include <linux/dma-mapping.h>
#include <linux/pgtable.h>
+#include <linux/slab.h>
struct cma;
@@ -277,6 +278,66 @@ static inline bool dev_is_dma_coherent(struct device *dev)
}
#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */
+/*
+ * Check whether potential kmalloc() buffers are safe for non-coherent DMA.
+ */
+static inline bool dma_kmalloc_safe(struct device *dev,
+ enum dma_data_direction dir)
+{
+ /*
+ * If DMA bouncing of kmalloc() buffers is disabled, the kmalloc()
+ * caches have already been aligned to a DMA-safe size.
+ */
+ if (!IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC))
+ return true;
+
+ /*
+ * kmalloc() buffers are DMA-safe irrespective of size if the device
+ * is coherent or the direction is DMA_TO_DEVICE (non-desctructive
+ * cache maintenance and benign cache line evictions).
+ */
+ if (dev_is_dma_coherent(dev) || dir == DMA_TO_DEVICE)
+ return true;
+
+ return false;
+}
+
+/*
+ * Check whether the given size, assuming it is for a kmalloc()'ed buffer, is
+ * sufficiently aligned for non-coherent DMA.
+ */
+static inline bool dma_kmalloc_size_aligned(size_t size)
+{
+ /*
+ * Larger kmalloc() sizes are guaranteed to be aligned to
+ * ARCH_DMA_MINALIGN.
+ */
+ if (size >= 2 * ARCH_DMA_MINALIGN ||
+ IS_ALIGNED(kmalloc_size_roundup(size), dma_get_cache_alignment()))
+ return true;
+
+ return false;
+}
+
+/*
+ * Check whether the given object size may have originated from a kmalloc()
+ * buffer with a slab alignment below the DMA-safe alignment and needs
+ * bouncing for non-coherent DMA. The pointer alignment is not considered and
+ * in-structure DMA-safe offsets are the responsibility of the caller. Such
+ * code should use the static ARCH_DMA_MINALIGN for compiler annotations.
+ *
+ * The heuristics can have false positives, bouncing unnecessarily, though the
+ * buffers would be small. False negatives are theoretically possible if, for
+ * example, multiple small kmalloc() buffers are coalesced into a larger
+ * buffer that passes the alignment check. There are no such known constructs
+ * in the kernel.
+ */
+static inline bool dma_kmalloc_needs_bounce(struct device *dev, size_t size,
+ enum dma_data_direction dir)
+{
+ return !dma_kmalloc_safe(dev, dir) && !dma_kmalloc_size_aligned(size);
+}
+
void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs);
void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 0ee20b764000..e13050eb9777 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_DMA_MAPPING_H
#define _LINUX_DMA_MAPPING_H
+#include <linux/cache.h>
#include <linux/sizes.h>
#include <linux/string.h>
#include <linux/device.h>
@@ -543,13 +544,15 @@ static inline int dma_set_min_align_mask(struct device *dev,
return 0;
}
+#ifndef dma_get_cache_alignment
static inline int dma_get_cache_alignment(void)
{
-#ifdef ARCH_DMA_MINALIGN
+#ifdef ARCH_HAS_DMA_MINALIGN
return ARCH_DMA_MINALIGN;
#endif
return 1;
}
+#endif
static inline void *dmam_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp)
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 481abf530b3c..6d5edef09d45 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -93,6 +93,15 @@ struct kmem_cache;
bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order);
+#ifdef CONFIG_FAIL_PAGE_ALLOC
+bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order);
+#else
+static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
+{
+ return false;
+}
+#endif /* CONFIG_FAIL_PAGE_ALLOC */
+
int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
#ifdef CONFIG_FAILSLAB
extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index a631bac12220..eaa0ac5f9003 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -10,7 +10,7 @@
struct frontswap_ops {
void (*init)(unsigned); /* this swap type was just swapon'ed */
int (*store)(unsigned, pgoff_t, struct page *); /* store a page */
- int (*load)(unsigned, pgoff_t, struct page *); /* load a page */
+ int (*load)(unsigned, pgoff_t, struct page *, bool *); /* load a page */
void (*invalidate_page)(unsigned, pgoff_t); /* page no longer needed */
void (*invalidate_area)(unsigned); /* swap type just swapoff'ed */
};
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed5b32dc2625..122b218b66c9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2763,6 +2763,8 @@ extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
ssize_t generic_perform_write(struct kiocb *, struct iov_iter *);
+ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
+ ssize_t direct_written, ssize_t buffered_written);
ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
rwf_t flags);
@@ -2860,11 +2862,6 @@ static inline void inode_dio_end(struct inode *inode)
wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
}
-/*
- * Warn about a page cache invalidation failure diring a direct I/O write.
- */
-void dio_warn_stale_pagecache(struct file *filp);
-
extern void inode_set_flags(struct inode *inode, unsigned int flags,
unsigned int mask);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index ed8cb537c6a7..665f06675c83 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -338,19 +338,12 @@ extern gfp_t gfp_allowed_mask;
/* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */
bool gfp_pfmemalloc_allowed(gfp_t gfp_mask);
-extern void pm_restrict_gfp_mask(void);
-extern void pm_restore_gfp_mask(void);
-
-extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
-
-#ifdef CONFIG_PM_SLEEP
-extern bool pm_suspended_storage(void);
-#else
-static inline bool pm_suspended_storage(void)
+static inline bool gfp_has_io_fs(gfp_t gfp)
{
- return false;
+ return (gfp & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS);
}
-#endif /* CONFIG_PM_SLEEP */
+
+extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
#ifdef CONFIG_CONTIG_ALLOC
/* The below functions must be run on a range from a single zone. */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6d041aa9f0fe..ca3c8e10f24a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -133,9 +133,8 @@ int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
unsigned long address, unsigned int flags);
long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
- struct page **, struct vm_area_struct **,
- unsigned long *, unsigned long *, long, unsigned int,
- int *);
+ struct page **, unsigned long *, unsigned long *,
+ long, unsigned int, int *);
void unmap_hugepage_range(struct vm_area_struct *,
unsigned long, unsigned long, struct page *,
zap_flags_t);
@@ -306,9 +305,8 @@ static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
static inline long follow_hugetlb_page(struct mm_struct *mm,
struct vm_area_struct *vma, struct page **pages,
- struct vm_area_struct **vmas, unsigned long *position,
- unsigned long *nr_pages, long i, unsigned int flags,
- int *nonblocking)
+ unsigned long *position, unsigned long *nr_pages,
+ long i, unsigned int flags, int *nonblocking)
{
BUG();
return 0;
@@ -757,26 +755,12 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio
return folio->_hugetlb_subpool;
}
-/*
- * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool
- */
-static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
-{
- return hugetlb_folio_subpool(page_folio(hpage));
-}
-
static inline void hugetlb_set_folio_subpool(struct folio *folio,
struct hugepage_subpool *subpool)
{
folio->_hugetlb_subpool = subpool;
}
-static inline void hugetlb_set_page_subpool(struct page *hpage,
- struct hugepage_subpool *subpool)
-{
- hugetlb_set_folio_subpool(page_folio(hpage), subpool);
-}
-
static inline struct hstate *hstate_file(struct file *f)
{
return hstate_inode(file_inode(f));
@@ -1031,11 +1015,6 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio
return NULL;
}
-static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
-{
- return NULL;
-}
-
static inline int isolate_or_dissolve_huge_page(struct page *page,
struct list_head *list)
{
@@ -1200,7 +1179,11 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
+#ifdef CONFIG_MMU
+ return ptep_get(ptep);
+#else
return *ptep;
+#endif
}
static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 81413cd3a3e7..d28a5e8097e4 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -722,7 +722,7 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev)
* must not share cachelines with the rest of the structure, thus making
* them safe for use with non-coherent DMA.
*/
-#define IIO_DMA_MINALIGN ARCH_KMALLOC_MINALIGN
+#define IIO_DMA_MINALIGN ARCH_DMA_MINALIGN
struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv);
/* The information at the returned address is guaranteed to be cacheline aligned */
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index f7ef70661ce2..819b6bc8ac08 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -343,7 +343,7 @@ static inline void *kasan_reset_tag(const void *addr)
* @is_write: whether the bad access is a write or a read
* @ip: instruction pointer for the accessibility check or the bad access itself
*/
-bool kasan_report(unsigned long addr, size_t size,
+bool kasan_report(const void *addr, size_t size,
bool is_write, unsigned long ip);
#else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */
diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 1fadb5f5978b..295548cca8b3 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -455,7 +455,9 @@ void *mas_erase(struct ma_state *mas);
int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp);
void mas_store_prealloc(struct ma_state *mas, void *entry);
void *mas_find(struct ma_state *mas, unsigned long max);
+void *mas_find_range(struct ma_state *mas, unsigned long max);
void *mas_find_rev(struct ma_state *mas, unsigned long min);
+void *mas_find_range_rev(struct ma_state *mas, unsigned long max);
int mas_preallocate(struct ma_state *mas, gfp_t gfp);
bool mas_is_err(struct ma_state *mas);
@@ -466,10 +468,18 @@ void mas_destroy(struct ma_state *mas);
int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries);
void *mas_prev(struct ma_state *mas, unsigned long min);
+void *mas_prev_range(struct ma_state *mas, unsigned long max);
void *mas_next(struct ma_state *mas, unsigned long max);
+void *mas_next_range(struct ma_state *mas, unsigned long max);
int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max,
unsigned long size);
+/*
+ * This finds an empty area from the highest address to the lowest.
+ * AKA "Topdown" version,
+ */
+int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
+ unsigned long max, unsigned long size);
static inline void mas_init(struct ma_state *mas, struct maple_tree *tree,
unsigned long addr)
@@ -482,23 +492,17 @@ static inline void mas_init(struct ma_state *mas, struct maple_tree *tree,
}
/* Checks if a mas has not found anything */
-static inline bool mas_is_none(struct ma_state *mas)
+static inline bool mas_is_none(const struct ma_state *mas)
{
return mas->node == MAS_NONE;
}
/* Checks if a mas has been paused */
-static inline bool mas_is_paused(struct ma_state *mas)
+static inline bool mas_is_paused(const struct ma_state *mas)
{
return mas->node == MAS_PAUSE;
}
-/*
- * This finds an empty area from the highest address to the lowest.
- * AKA "Topdown" version,
- */
-int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
- unsigned long max, unsigned long size);
/**
* mas_reset() - Reset a Maple Tree operation state.
* @mas: Maple Tree operation state.
@@ -528,7 +532,6 @@ static inline void mas_reset(struct ma_state *mas)
#define mas_for_each(__mas, __entry, __max) \
while (((__entry) = mas_find((__mas), (__max))) != NULL)
-
/**
* mas_set_range() - Set up Maple Tree operation state for a different index.
* @mas: Maple Tree operation state.
@@ -616,7 +619,7 @@ static inline void mt_clear_in_rcu(struct maple_tree *mt)
return;
if (mt_external_lock(mt)) {
- BUG_ON(!mt_lock_is_held(mt));
+ WARN_ON(!mt_lock_is_held(mt));
mt->ma_flags &= ~MT_FLAGS_USE_RCU;
} else {
mtree_lock(mt);
@@ -635,7 +638,7 @@ static inline void mt_set_in_rcu(struct maple_tree *mt)
return;
if (mt_external_lock(mt)) {
- BUG_ON(!mt_lock_is_held(mt));
+ WARN_ON(!mt_lock_is_held(mt));
mt->ma_flags |= MT_FLAGS_USE_RCU;
} else {
mtree_lock(mt);
@@ -670,10 +673,17 @@ void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max);
#ifdef CONFIG_DEBUG_MAPLE_TREE
+enum mt_dump_format {
+ mt_dump_dec,
+ mt_dump_hex,
+};
+
extern atomic_t maple_tree_tests_run;
extern atomic_t maple_tree_tests_passed;
-void mt_dump(const struct maple_tree *mt);
+void mt_dump(const struct maple_tree *mt, enum mt_dump_format format);
+void mas_dump(const struct ma_state *mas);
+void mas_wr_dump(const struct ma_wr_state *wr_mas);
void mt_validate(struct maple_tree *mt);
void mt_cache_shrink(void);
#define MT_BUG_ON(__tree, __x) do { \
@@ -681,7 +691,23 @@ void mt_cache_shrink(void);
if (__x) { \
pr_info("BUG at %s:%d (%u)\n", \
__func__, __LINE__, __x); \
- mt_dump(__tree); \
+ mt_dump(__tree, mt_dump_hex); \
+ pr_info("Pass: %u Run:%u\n", \
+ atomic_read(&maple_tree_tests_passed), \
+ atomic_read(&maple_tree_tests_run)); \
+ dump_stack(); \
+ } else { \
+ atomic_inc(&maple_tree_tests_passed); \
+ } \
+} while (0)
+
+#define MAS_BUG_ON(__mas, __x) do { \
+ atomic_inc(&maple_tree_tests_run); \
+ if (__x) { \
+ pr_info("BUG at %s:%d (%u)\n", \
+ __func__, __LINE__, __x); \
+ mas_dump(__mas); \
+ mt_dump((__mas)->tree, mt_dump_hex); \
pr_info("Pass: %u Run:%u\n", \
atomic_read(&maple_tree_tests_passed), \
atomic_read(&maple_tree_tests_run)); \
@@ -690,8 +716,84 @@ void mt_cache_shrink(void);
atomic_inc(&maple_tree_tests_passed); \
} \
} while (0)
+
+#define MAS_WR_BUG_ON(__wrmas, __x) do { \
+ atomic_inc(&maple_tree_tests_run); \
+ if (__x) { \
+ pr_info("BUG at %s:%d (%u)\n", \
+ __func__, __LINE__, __x); \
+ mas_wr_dump(__wrmas); \
+ mas_dump((__wrmas)->mas); \
+ mt_dump((__wrmas)->mas->tree, mt_dump_hex); \
+ pr_info("Pass: %u Run:%u\n", \
+ atomic_read(&maple_tree_tests_passed), \
+ atomic_read(&maple_tree_tests_run)); \
+ dump_stack(); \
+ } else { \
+ atomic_inc(&maple_tree_tests_passed); \
+ } \
+} while (0)
+
+#define MT_WARN_ON(__tree, __x) ({ \
+ int ret = !!(__x); \
+ atomic_inc(&maple_tree_tests_run); \
+ if (ret) { \
+ pr_info("WARN at %s:%d (%u)\n", \
+ __func__, __LINE__, __x); \
+ mt_dump(__tree, mt_dump_hex); \
+ pr_info("Pass: %u Run:%u\n", \
+ atomic_read(&maple_tree_tests_passed), \
+ atomic_read(&maple_tree_tests_run)); \
+ dump_stack(); \
+ } else { \
+ atomic_inc(&maple_tree_tests_passed); \
+ } \
+ unlikely(ret); \
+})
+
+#define MAS_WARN_ON(__mas, __x) ({ \
+ int ret = !!(__x); \
+ atomic_inc(&maple_tree_tests_run); \
+ if (ret) { \
+ pr_info("WARN at %s:%d (%u)\n", \
+ __func__, __LINE__, __x); \
+ mas_dump(__mas); \
+ mt_dump((__mas)->tree, mt_dump_hex); \
+ pr_info("Pass: %u Run:%u\n", \
+ atomic_read(&maple_tree_tests_passed), \
+ atomic_read(&maple_tree_tests_run)); \
+ dump_stack(); \
+ } else { \
+ atomic_inc(&maple_tree_tests_passed); \
+ } \
+ unlikely(ret); \
+})
+
+#define MAS_WR_WARN_ON(__wrmas, __x) ({ \
+ int ret = !!(__x); \
+ atomic_inc(&maple_tree_tests_run); \
+ if (ret) { \
+ pr_info("WARN at %s:%d (%u)\n", \
+ __func__, __LINE__, __x); \
+ mas_wr_dump(__wrmas); \
+ mas_dump((__wrmas)->mas); \
+ mt_dump((__wrmas)->mas->tree, mt_dump_hex); \
+ pr_info("Pass: %u Run:%u\n", \
+ atomic_read(&maple_tree_tests_passed), \
+ atomic_read(&maple_tree_tests_run)); \
+ dump_stack(); \
+ } else { \
+ atomic_inc(&maple_tree_tests_passed); \
+ } \
+ unlikely(ret); \
+})
#else
-#define MT_BUG_ON(__tree, __x) BUG_ON(__x)
+#define MT_BUG_ON(__tree, __x) BUG_ON(__x)
+#define MAS_BUG_ON(__mas, __x) BUG_ON(__x)
+#define MAS_WR_BUG_ON(__mas, __x) BUG_ON(__x)
+#define MT_WARN_ON(__tree, __x) WARN_ON(__x)
+#define MAS_WARN_ON(__mas, __x) WARN_ON(__x)
+#define MAS_WR_WARN_ON(__mas, __x) WARN_ON(__x)
#endif /* CONFIG_DEBUG_MAPLE_TREE */
#endif /*_LINUX_MAPLE_TREE_H */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index f82ee3fac1cd..f71ff9f0ec81 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -128,7 +128,6 @@ int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
void memblock_free_all(void);
void memblock_free(void *ptr, size_t size);
-void reset_node_managed_pages(pg_data_t *pgdat);
void reset_all_zones_managed_pages(void);
/* Low level functions */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 222d7370134c..5818af8eca5a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -419,7 +419,7 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio)
*
* - the folio lock
* - LRU isolation
- * - lock_page_memcg()
+ * - folio_memcg_lock()
* - exclusive reference
* - mem_cgroup_trylock_pages()
*
@@ -820,8 +820,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
struct mem_cgroup *,
struct mem_cgroup_reclaim_cookie *);
void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
-int mem_cgroup_scan_tasks(struct mem_cgroup *,
- int (*)(struct task_struct *, void *), void *);
+void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
+ int (*)(struct task_struct *, void *), void *arg);
static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
{
@@ -949,8 +949,6 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
void folio_memcg_lock(struct folio *folio);
void folio_memcg_unlock(struct folio *folio);
-void lock_page_memcg(struct page *page);
-void unlock_page_memcg(struct page *page);
void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val);
@@ -1038,7 +1036,6 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
}
void mem_cgroup_flush_stats(void);
-void mem_cgroup_flush_stats_atomic(void);
void mem_cgroup_flush_stats_ratelimited(void);
void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
@@ -1367,10 +1364,9 @@ static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
{
}
-static inline int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
+static inline void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
int (*fn)(struct task_struct *, void *), void *arg)
{
- return 0;
}
static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
@@ -1439,14 +1435,6 @@ mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
{
}
-static inline void lock_page_memcg(struct page *page)
-{
-}
-
-static inline void unlock_page_memcg(struct page *page)
-{
-}
-
static inline void folio_memcg_lock(struct folio *folio)
{
}
@@ -1537,10 +1525,6 @@ static inline void mem_cgroup_flush_stats(void)
{
}
-static inline void mem_cgroup_flush_stats_atomic(void)
-{
-}
-
static inline void mem_cgroup_flush_stats_ratelimited(void)
{
}
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 9fcbf5706595..013c69753c91 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -326,9 +326,6 @@ static inline int remove_memory(u64 start, u64 size)
static inline void __remove_memory(u64 start, u64 size) {}
#endif /* CONFIG_MEMORY_HOTREMOVE */
-extern void set_zone_contiguous(struct zone *zone);
-extern void clear_zone_contiguous(struct zone *zone);
-
#ifdef CONFIG_MEMORY_HOTPLUG
extern void __ref free_area_init_core_hotplug(struct pglist_data *pgdat);
extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
@@ -347,9 +344,8 @@ extern void remove_pfn_range_from_zone(struct zone *zone,
extern int sparse_add_section(int nid, unsigned long pfn,
unsigned long nr_pages, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap);
-extern void sparse_remove_section(struct mem_section *ms,
- unsigned long pfn, unsigned long nr_pages,
- unsigned long map_offset, struct vmem_altmap *altmap);
+extern void sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
+ struct vmem_altmap *altmap);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum);
extern struct zone *zone_for_pfn_range(int online_type, int nid,
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 6241a1596a75..711dd9412561 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,8 +7,8 @@
#include <linux/migrate_mode.h>
#include <linux/hugetlb.h>
-typedef struct page *new_page_t(struct page *page, unsigned long private);
-typedef void free_page_t(struct page *page, unsigned long private);
+typedef struct folio *new_folio_t(struct folio *folio, unsigned long private);
+typedef void free_folio_t(struct folio *folio, unsigned long private);
struct migration_target_control;
@@ -67,16 +67,16 @@ int migrate_folio_extra(struct address_space *mapping, struct folio *dst,
struct folio *src, enum migrate_mode mode, int extra_count);
int migrate_folio(struct address_space *mapping, struct folio *dst,
struct folio *src, enum migrate_mode mode);
-int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
+int migrate_pages(struct list_head *l, new_folio_t new, free_folio_t free,
unsigned long private, enum migrate_mode mode, int reason,
unsigned int *ret_succeeded);
-struct page *alloc_migration_target(struct page *page, unsigned long private);
+struct folio *alloc_migration_target(struct folio *src, unsigned long private);
bool isolate_movable_page(struct page *page, isolate_mode_t mode);
int migrate_huge_page_move_mapping(struct address_space *mapping,
struct folio *dst, struct folio *src);
-void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep,
- spinlock_t *ptl);
+void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl)
+ __releases(ptl);
void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
void folio_migrate_copy(struct folio *newfolio, struct folio *folio);
int folio_migrate_mapping(struct address_space *mapping,
@@ -85,11 +85,11 @@ int folio_migrate_mapping(struct address_space *mapping,
#else
static inline void putback_movable_pages(struct list_head *l) {}
-static inline int migrate_pages(struct list_head *l, new_page_t new,
- free_page_t free, unsigned long private, enum migrate_mode mode,
- int reason, unsigned int *ret_succeeded)
+static inline int migrate_pages(struct list_head *l, new_folio_t new,
+ free_folio_t free, unsigned long private,
+ enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
{ return -ENOSYS; }
-static inline struct page *alloc_migration_target(struct page *page,
+static inline struct folio *alloc_migration_target(struct folio *src,
unsigned long private)
{ return NULL; }
static inline bool isolate_movable_page(struct page *page, isolate_mode_t mode)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fec149585985..ae866bc9bad6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -725,7 +725,6 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
#else /* CONFIG_PER_VMA_LOCK */
-static inline void vma_init_lock(struct vm_area_struct *vma) {}
static inline bool vma_start_read(struct vm_area_struct *vma)
{ return false; }
static inline void vma_end_read(struct vm_area_struct *vma) {}
@@ -866,11 +865,24 @@ static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
return mas_find(&vmi->mas, ULONG_MAX);
}
+static inline
+struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi)
+{
+ return mas_next_range(&vmi->mas, ULONG_MAX);
+}
+
+
static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
{
return mas_prev(&vmi->mas, 0);
}
+static inline
+struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
+{
+ return mas_prev_range(&vmi->mas, 0);
+}
+
static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
{
return vmi->mas.index;
@@ -1208,17 +1220,6 @@ enum compound_dtor_id {
#endif
NR_COMPOUND_DTORS,
};
-extern compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS];
-
-static inline void set_compound_page_dtor(struct page *page,
- enum compound_dtor_id compound_dtor)
-{
- struct folio *folio = (struct folio *)page;
-
- VM_BUG_ON_PAGE(compound_dtor >= NR_COMPOUND_DTORS, page);
- VM_BUG_ON_PAGE(!PageHead(page), page);
- folio->_folio_dtor = compound_dtor;
-}
static inline void folio_set_compound_dtor(struct folio *folio,
enum compound_dtor_id compound_dtor)
@@ -1229,16 +1230,6 @@ static inline void folio_set_compound_dtor(struct folio *folio,
void destroy_large_folio(struct folio *folio);
-static inline void set_compound_order(struct page *page, unsigned int order)
-{
- struct folio *folio = (struct folio *)page;
-
- folio->_folio_order = order;
-#ifdef CONFIG_64BIT
- folio->_folio_nr_pages = 1U << order;
-#endif
-}
-
/* Returns the number of bytes in this potentially compound page. */
static inline unsigned long page_size(struct page *page)
{
@@ -1932,39 +1923,35 @@ static inline bool is_zero_folio(const struct folio *folio)
return is_zero_page(&folio->page);
}
-/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */
+/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin folios */
#ifdef CONFIG_MIGRATION
-static inline bool is_longterm_pinnable_page(struct page *page)
+static inline bool folio_is_longterm_pinnable(struct folio *folio)
{
#ifdef CONFIG_CMA
- int mt = get_pageblock_migratetype(page);
+ int mt = folio_migratetype(folio);
if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE)
return false;
#endif
/* The zero page can be "pinned" but gets special handling. */
- if (is_zero_page(page))
+ if (is_zero_folio(folio))
return true;
/* Coherent device memory must always allow eviction. */
- if (is_device_coherent_page(page))
+ if (folio_is_device_coherent(folio))
return false;
- /* Otherwise, non-movable zone pages can be pinned. */
- return !is_zone_movable_page(page);
+ /* Otherwise, non-movable zone folios can be pinned. */
+ return !folio_is_zone_movable(folio);
+
}
#else
-static inline bool is_longterm_pinnable_page(struct page *page)
+static inline bool folio_is_longterm_pinnable(struct folio *folio)
{
return true;
}
#endif
-static inline bool folio_is_longterm_pinnable(struct folio *folio)
-{
- return is_longterm_pinnable_page(&folio->page);
-}
-
static inline void set_page_zone(struct page *page, enum zone_type zone)
{
page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
@@ -2375,6 +2362,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
unmap_mapping_range(mapping, holebegin, holelen, 0);
}
+static inline struct vm_area_struct *vma_lookup(struct mm_struct *mm,
+ unsigned long addr);
+
extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
@@ -2383,19 +2373,42 @@ extern int __access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
long get_user_pages_remote(struct mm_struct *mm,
- unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas, int *locked);
+ unsigned long start, unsigned long nr_pages,
+ unsigned int gup_flags, struct page **pages,
+ int *locked);
long pin_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas, int *locked);
+ int *locked);
+
+static inline struct page *get_user_page_vma_remote(struct mm_struct *mm,
+ unsigned long addr,
+ int gup_flags,
+ struct vm_area_struct **vmap)
+{
+ struct page *page;
+ struct vm_area_struct *vma;
+ int got = get_user_pages_remote(mm, addr, 1, gup_flags, &page, NULL);
+
+ if (got < 0)
+ return ERR_PTR(got);
+ if (got == 0)
+ return NULL;
+
+ vma = vma_lookup(mm, addr);
+ if (WARN_ON_ONCE(!vma)) {
+ put_page(page);
+ return ERR_PTR(-EINVAL);
+ }
+
+ *vmap = vma;
+ return page;
+}
+
long get_user_pages(unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas);
+ unsigned int gup_flags, struct page **pages);
long pin_user_pages(unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas);
+ unsigned int gup_flags, struct page **pages);
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags);
long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
@@ -2445,6 +2458,7 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma,
#define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \
MM_CP_UFFD_WP_RESOLVE)
+bool vma_needs_dirty_tracking(struct vm_area_struct *vma);
int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
{
@@ -2810,14 +2824,25 @@ static inline void pgtable_pte_page_dtor(struct page *page)
dec_lruvec_page_state(page, NR_PAGETABLE);
}
-#define pte_offset_map_lock(mm, pmd, address, ptlp) \
-({ \
- spinlock_t *__ptl = pte_lockptr(mm, pmd); \
- pte_t *__pte = pte_offset_map(pmd, address); \
- *(ptlp) = __ptl; \
- spin_lock(__ptl); \
- __pte; \
-})
+pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp);
+static inline pte_t *pte_offset_map(pmd_t *pmd, unsigned long addr)
+{
+ return __pte_offset_map(pmd, addr, NULL);
+}
+
+pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, spinlock_t **ptlp);
+static inline pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, spinlock_t **ptlp)
+{
+ pte_t *pte;
+
+ __cond_lock(*ptlp, pte = __pte_offset_map_lock(mm, pmd, addr, ptlp));
+ return pte;
+}
+
+pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, spinlock_t **ptlp);
#define pte_unmap_unlock(pte, ptl) do { \
spin_unlock(ptl); \
@@ -2938,7 +2963,8 @@ extern unsigned long free_reserved_area(void *start, void *end,
extern void adjust_managed_page_count(struct page *page, long count);
-extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end);
+extern void reserve_bootmem_region(phys_addr_t start,
+ phys_addr_t end, int nid);
/* Free the reserved page into the buddy system, so it gets managed. */
static inline void free_reserved_page(struct page *page)
@@ -3017,12 +3043,6 @@ extern int __meminit early_pfn_to_nid(unsigned long pfn);
#endif
extern void set_dma_reserve(unsigned long new_dma_reserve);
-extern void memmap_init_range(unsigned long, int, unsigned long,
- unsigned long, unsigned long, enum meminit_context,
- struct vmem_altmap *, int migratetype);
-extern void setup_per_zone_wmarks(void);
-extern void calculate_min_free_kbytes(void);
-extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void);
extern void __init mmap_init(void);
@@ -3043,11 +3063,6 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...);
extern void setup_per_cpu_pageset(void);
-/* page_alloc.c */
-extern int min_free_kbytes;
-extern int watermark_boost_factor;
-extern int watermark_scale_factor;
-
/* nommu.c */
extern atomic_long_t mmap_pages_allocated;
extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
@@ -3494,9 +3509,58 @@ static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages)
if (debug_pagealloc_enabled_static())
__kernel_map_pages(page, numpages, 0);
}
+
+extern unsigned int _debug_guardpage_minorder;
+DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
+
+static inline unsigned int debug_guardpage_minorder(void)
+{
+ return _debug_guardpage_minorder;
+}
+
+static inline bool debug_guardpage_enabled(void)
+{
+ return static_branch_unlikely(&_debug_guardpage_enabled);
+}
+
+static inline bool page_is_guard(struct page *page)
+{
+ if (!debug_guardpage_enabled())
+ return false;
+
+ return PageGuard(page);
+}
+
+bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
+ int migratetype);
+static inline bool set_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype)
+{
+ if (!debug_guardpage_enabled())
+ return false;
+ return __set_page_guard(zone, page, order, migratetype);
+}
+
+void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
+ int migratetype);
+static inline void clear_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype)
+{
+ if (!debug_guardpage_enabled())
+ return;
+ __clear_page_guard(zone, page, order, migratetype);
+}
+
#else /* CONFIG_DEBUG_PAGEALLOC */
static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {}
static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {}
+static inline unsigned int debug_guardpage_minorder(void) { return 0; }
+static inline bool debug_guardpage_enabled(void) { return false; }
+static inline bool page_is_guard(struct page *page) { return false; }
+static inline bool set_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype) { return false; }
+static inline void clear_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype) {}
#endif /* CONFIG_DEBUG_PAGEALLOC */
#ifdef __HAVE_ARCH_GATE_AREA
@@ -3609,6 +3673,10 @@ extern void shake_page(struct page *p);
extern atomic_long_t num_poisoned_pages __read_mostly;
extern int soft_offline_page(unsigned long pfn, int flags);
#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Sysfs entries for memory failure handling statistics.
+ */
+extern const struct attribute_group memory_failure_attr_group;
extern void memory_failure_queue(unsigned long pfn, int flags);
extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared);
@@ -3701,11 +3769,6 @@ enum mf_action_page_type {
MF_MSG_UNKNOWN,
};
-/*
- * Sysfs entries for memory failure handling statistics.
- */
-extern const struct attribute_group memory_failure_attr_group;
-
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
extern void clear_huge_page(struct page *page,
unsigned long addr_hint,
@@ -3735,33 +3798,6 @@ static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
-#ifdef CONFIG_DEBUG_PAGEALLOC
-extern unsigned int _debug_guardpage_minorder;
-DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
-
-static inline unsigned int debug_guardpage_minorder(void)
-{
- return _debug_guardpage_minorder;
-}
-
-static inline bool debug_guardpage_enabled(void)
-{
- return static_branch_unlikely(&_debug_guardpage_enabled);
-}
-
-static inline bool page_is_guard(struct page *page)
-{
- if (!debug_guardpage_enabled())
- return false;
-
- return PageGuard(page);
-}
-#else
-static inline unsigned int debug_guardpage_minorder(void) { return 0; }
-static inline bool debug_guardpage_enabled(void) { return false; }
-static inline bool page_is_guard(struct page *page) { return false; }
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
#if MAX_NUMNODES > 1
void __init setup_nr_node_ids(void);
#else
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 0e1d239a882c..21d6c72bcc71 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -323,12 +323,6 @@ void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
list_add(&folio->lru, &lruvec->lists[lru]);
}
-static __always_inline void add_page_to_lru_list(struct page *page,
- struct lruvec *lruvec)
-{
- lruvec_add_folio(lruvec, page_folio(page));
-}
-
static __always_inline
void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio)
{
@@ -357,12 +351,6 @@ void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
-folio_nr_pages(folio));
}
-static __always_inline void del_page_from_lru_list(struct page *page,
- struct lruvec *lruvec)
-{
- lruvec_del_folio(lruvec, page_folio(page));
-}
-
#ifdef CONFIG_ANON_VMA_NAME
/*
* mmap_lock should be read-locked when calling anon_vma_name(). Caller should
@@ -555,7 +543,7 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr,
bool arm_uffd_pte = false;
/* The current status of the pte should be "cleared" before calling */
- WARN_ON_ONCE(!pte_none(*pte));
+ WARN_ON_ONCE(!pte_none(ptep_get(pte)));
/*
* NOTE: userfaultfd_wp_unpopulated() doesn't need this whole
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 306a3d1a0fa6..de10fc797c8e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -583,6 +583,21 @@ struct mm_cid {
struct kioctx_table;
struct mm_struct {
struct {
+ /*
+ * Fields which are often written to are placed in a separate
+ * cache line.
+ */
+ struct {
+ /**
+ * @mm_count: The number of references to &struct
+ * mm_struct (@mm_users count as 1).
+ *
+ * Use mmgrab()/mmdrop() to modify. When this drops to
+ * 0, the &struct mm_struct is freed.
+ */
+ atomic_t mm_count;
+ } ____cacheline_aligned_in_smp;
+
struct maple_tree mm_mt;
#ifdef CONFIG_MMU
unsigned long (*get_unmapped_area) (struct file *filp,
@@ -620,14 +635,6 @@ struct mm_struct {
*/
atomic_t mm_users;
- /**
- * @mm_count: The number of references to &struct mm_struct
- * (@mm_users count as 1).
- *
- * Use mmgrab()/mmdrop() to modify. When this drops to 0, the
- * &struct mm_struct is freed.
- */
- atomic_t mm_count;
#ifdef CONFIG_SCHED_MM_CID
/**
* @pcpu_cid: Per-cpu current cid.
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index b8728d11c949..7c3e7b0b0e8f 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -8,10 +8,12 @@
struct page;
struct vm_area_struct;
struct mm_struct;
+struct vma_iterator;
void dump_page(struct page *page, const char *reason);
void dump_vma(const struct vm_area_struct *vma);
void dump_mm(const struct mm_struct *mm);
+void vma_iter_dump_tree(const struct vma_iterator *vmi);
#ifdef CONFIG_DEBUG_VM
#define VM_BUG_ON(cond) BUG_ON(cond)
@@ -74,6 +76,17 @@ void dump_mm(const struct mm_struct *mm);
} \
unlikely(__ret_warn_once); \
})
+#define VM_WARN_ON_ONCE_MM(cond, mm) ({ \
+ static bool __section(".data.once") __warned; \
+ int __ret_warn_once = !!(cond); \
+ \
+ if (unlikely(__ret_warn_once && !__warned)) { \
+ dump_mm(mm); \
+ __warned = true; \
+ WARN_ON(1); \
+ } \
+ unlikely(__ret_warn_once); \
+})
#define VM_WARN_ON(cond) (void)WARN_ON(cond)
#define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
@@ -90,6 +103,7 @@ void dump_mm(const struct mm_struct *mm);
#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ON_ONCE_MM(cond, mm) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
#endif
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6c1c2fc13017..5e50b78d58ea 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -105,6 +105,9 @@ extern int page_group_by_mobility_disabled;
#define get_pageblock_migratetype(page) \
get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK)
+#define folio_migratetype(folio) \
+ get_pfnblock_flags_mask(&folio->page, folio_pfn(folio), \
+ MIGRATETYPE_MASK)
struct free_area {
struct list_head free_list[MIGRATE_TYPES];
unsigned long nr_free;
@@ -293,9 +296,21 @@ static inline bool is_active_lru(enum lru_list lru)
#define ANON_AND_FILE 2
enum lruvec_flags {
- LRUVEC_CONGESTED, /* lruvec has many dirty pages
- * backed by a congested BDI
- */
+ /*
+ * An lruvec has many dirty pages backed by a congested BDI:
+ * 1. LRUVEC_CGROUP_CONGESTED is set by cgroup-level reclaim.
+ * It can be cleared by cgroup reclaim or kswapd.
+ * 2. LRUVEC_NODE_CONGESTED is set by kswapd node-level reclaim.
+ * It can only be cleared by kswapd.
+ *
+ * Essentially, kswapd can unthrottle an lruvec throttled by cgroup
+ * reclaim, but not vice versa. This only applies to the root cgroup.
+ * The goal is to prevent cgroup reclaim on the root cgroup (e.g.
+ * memory.reclaim) to unthrottle an unbalanced node (that was throttled
+ * by kswapd).
+ */
+ LRUVEC_CGROUP_CONGESTED,
+ LRUVEC_NODE_CONGESTED,
};
#endif /* !__GENERATING_BOUNDS_H */
@@ -537,7 +552,7 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg);
void lru_gen_online_memcg(struct mem_cgroup *memcg);
void lru_gen_offline_memcg(struct mem_cgroup *memcg);
void lru_gen_release_memcg(struct mem_cgroup *memcg);
-void lru_gen_soft_reclaim(struct lruvec *lruvec);
+void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid);
#else /* !CONFIG_MEMCG */
@@ -588,7 +603,7 @@ static inline void lru_gen_release_memcg(struct mem_cgroup *memcg)
{
}
-static inline void lru_gen_soft_reclaim(struct lruvec *lruvec)
+static inline void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid)
{
}
@@ -1124,6 +1139,11 @@ static inline bool is_zone_movable_page(const struct page *page)
{
return page_zonenum(page) == ZONE_MOVABLE;
}
+
+static inline bool folio_is_zone_movable(const struct folio *folio)
+{
+ return folio_zonenum(folio) == ZONE_MOVABLE;
+}
#endif
/*
@@ -1520,27 +1540,6 @@ static inline bool has_managed_dma(void)
}
#endif
-/* These two functions are used to setup the per zone pages min values */
-struct ctl_table;
-
-int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void *, size_t *,
- loff_t *);
-int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *,
- size_t *, loff_t *);
-extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *,
- size_t *, loff_t *);
-int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *, int,
- void *, size_t *, loff_t *);
-int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
- void *, size_t *, loff_t *);
-int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
- void *, size_t *, loff_t *);
-int numa_zonelist_order_handler(struct ctl_table *, int,
- void *, size_t *, loff_t *);
-extern int percpu_pagelist_high_fraction;
-extern char numa_zonelist_order[];
-#define NUMA_ZONELIST_ORDER_LEN 16
#ifndef CONFIG_NUMA
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 5456b7be38ae..4ac34392823a 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -37,27 +37,12 @@ void set_pageblock_migratetype(struct page *page, int migratetype);
int move_freepages_block(struct zone *zone, struct page *page,
int migratetype, int *num_movable);
-/*
- * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE.
- */
-int
-start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
- int migratetype, int flags, gfp_t gfp_flags);
+int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
+ int migratetype, int flags, gfp_t gfp_flags);
-/*
- * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
- * target range is [start_pfn, end_pfn)
- */
-void
-undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
- int migratetype);
+void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
+ int migratetype);
-/*
- * Test all pages in [start_pfn, end_pfn) are isolated or not.
- */
int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
int isol_flags);
-
-struct page *alloc_migrate_target(struct page *page, unsigned long private);
-
#endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a56308a9d1a4..716953ee1ebd 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -30,6 +30,9 @@ static inline void invalidate_remote_inode(struct inode *inode)
int invalidate_inode_pages2(struct address_space *mapping);
int invalidate_inode_pages2_range(struct address_space *mapping,
pgoff_t start, pgoff_t end);
+int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
+void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count);
+
int write_inode_now(struct inode *, int sync);
int filemap_fdatawrite(struct address_space *);
int filemap_flush(struct address_space *);
@@ -54,6 +57,7 @@ int filemap_check_errors(struct address_space *mapping);
void __filemap_set_wb_err(struct address_space *mapping, int err);
int filemap_fdatawrite_wbc(struct address_space *mapping,
struct writeback_control *wbc);
+int kiocb_write_and_wait(struct kiocb *iocb, size_t count);
static inline int filemap_write_and_wait(struct address_space *mapping)
{
@@ -1078,8 +1082,6 @@ int filemap_migrate_folio(struct address_space *mapping, struct folio *dst,
#else
#define filemap_migrate_folio NULL
#endif
-void page_endio(struct page *page, bool is_write, int err);
-
void folio_end_private_2(struct folio *folio);
void folio_wait_private_2(struct folio *folio);
int folio_wait_private_2_killable(struct folio *folio);
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index f582f7213ea5..87cc678adc85 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -3,65 +3,18 @@
* include/linux/pagevec.h
*
* In many places it is efficient to batch an operation up against multiple
- * pages. A pagevec is a multipage container which is used for that.
+ * folios. A folio_batch is a container which is used for that.
*/
#ifndef _LINUX_PAGEVEC_H
#define _LINUX_PAGEVEC_H
-#include <linux/xarray.h>
+#include <linux/types.h>
-/* 15 pointers + header align the pagevec structure to a power of two */
+/* 15 pointers + header align the folio_batch structure to a power of two */
#define PAGEVEC_SIZE 15
-struct page;
struct folio;
-struct address_space;
-
-/* Layout must match folio_batch */
-struct pagevec {
- unsigned char nr;
- bool percpu_pvec_drained;
- struct page *pages[PAGEVEC_SIZE];
-};
-
-void __pagevec_release(struct pagevec *pvec);
-
-static inline void pagevec_init(struct pagevec *pvec)
-{
- pvec->nr = 0;
- pvec->percpu_pvec_drained = false;
-}
-
-static inline void pagevec_reinit(struct pagevec *pvec)
-{
- pvec->nr = 0;
-}
-
-static inline unsigned pagevec_count(struct pagevec *pvec)
-{
- return pvec->nr;
-}
-
-static inline unsigned pagevec_space(struct pagevec *pvec)
-{
- return PAGEVEC_SIZE - pvec->nr;
-}
-
-/*
- * Add a page to a pagevec. Returns the number of slots still available.
- */
-static inline unsigned pagevec_add(struct pagevec *pvec, struct page *page)
-{
- pvec->pages[pvec->nr++] = page;
- return pagevec_space(pvec);
-}
-
-static inline void pagevec_release(struct pagevec *pvec)
-{
- if (pagevec_count(pvec))
- __pagevec_release(pvec);
-}
/**
* struct folio_batch - A collection of folios.
@@ -78,11 +31,6 @@ struct folio_batch {
struct folio *folios[PAGEVEC_SIZE];
};
-/* Layout must match pagevec */
-static_assert(sizeof(struct pagevec) == sizeof(struct folio_batch));
-static_assert(offsetof(struct pagevec, pages) ==
- offsetof(struct folio_batch, folios));
-
/**
* folio_batch_init() - Initialise a batch of folios
* @fbatch: The folio batch.
@@ -105,7 +53,7 @@ static inline unsigned int folio_batch_count(struct folio_batch *fbatch)
return fbatch->nr;
}
-static inline unsigned int fbatch_space(struct folio_batch *fbatch)
+static inline unsigned int folio_batch_space(struct folio_batch *fbatch)
{
return PAGEVEC_SIZE - fbatch->nr;
}
@@ -124,12 +72,15 @@ static inline unsigned folio_batch_add(struct folio_batch *fbatch,
struct folio *folio)
{
fbatch->folios[fbatch->nr++] = folio;
- return fbatch_space(fbatch);
+ return folio_batch_space(fbatch);
}
+void __folio_batch_release(struct folio_batch *pvec);
+
static inline void folio_batch_release(struct folio_batch *fbatch)
{
- pagevec_release((struct pagevec *)fbatch);
+ if (folio_batch_count(fbatch))
+ __folio_batch_release(fbatch);
}
void folio_batch_remove_exceptionals(struct folio_batch *fbatch);
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index c5a51481bbb9..5063b482e34f 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -94,14 +94,22 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
#define pte_offset_kernel pte_offset_kernel
#endif
-#if defined(CONFIG_HIGHPTE)
-#define pte_offset_map(dir, address) \
- ((pte_t *)kmap_atomic(pmd_page(*(dir))) + \
- pte_index((address)))
-#define pte_unmap(pte) kunmap_atomic((pte))
+#ifdef CONFIG_HIGHPTE
+#define __pte_map(pmd, address) \
+ ((pte_t *)kmap_local_page(pmd_page(*(pmd))) + pte_index((address)))
+#define pte_unmap(pte) do { \
+ kunmap_local((pte)); \
+ /* rcu_read_unlock() to be added later */ \
+} while (0)
#else
-#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
-#define pte_unmap(pte) ((void)(pte)) /* NOP */
+static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address)
+{
+ return pte_offset_kernel(pmd, address);
+}
+static inline void pte_unmap(pte_t *pte)
+{
+ /* rcu_read_unlock() to be added later */
+}
#endif
/* Find an entry in the second-level page table.. */
@@ -204,12 +212,26 @@ static inline int pudp_set_access_flags(struct vm_area_struct *vma,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
+#ifndef ptep_get
+static inline pte_t ptep_get(pte_t *ptep)
+{
+ return READ_ONCE(*ptep);
+}
+#endif
+
+#ifndef pmdp_get
+static inline pmd_t pmdp_get(pmd_t *pmdp)
+{
+ return READ_ONCE(*pmdp);
+}
+#endif
+
#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address,
pte_t *ptep)
{
- pte_t pte = *ptep;
+ pte_t pte = ptep_get(ptep);
int r = 1;
if (!pte_young(pte))
r = 0;
@@ -296,7 +318,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address,
pte_t *ptep)
{
- pte_t pte = *ptep;
+ pte_t pte = ptep_get(ptep);
pte_clear(mm, address, ptep);
page_table_check_pte_clear(mm, address, pte);
return pte;
@@ -309,20 +331,6 @@ static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
ptep_get_and_clear(mm, addr, ptep);
}
-#ifndef ptep_get
-static inline pte_t ptep_get(pte_t *ptep)
-{
- return READ_ONCE(*ptep);
-}
-#endif
-
-#ifndef pmdp_get
-static inline pmd_t pmdp_get(pmd_t *pmdp)
-{
- return READ_ONCE(*pmdp);
-}
-#endif
-
#ifdef CONFIG_GUP_GET_PXX_LOW_HIGH
/*
* For walking the pagetables without holding any locks. Some architectures
@@ -511,7 +519,7 @@ extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
struct mm_struct;
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
{
- pte_t old_pte = *ptep;
+ pte_t old_pte = ptep_get(ptep);
set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
}
#endif
@@ -591,6 +599,10 @@ extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
#endif
+#ifndef arch_needs_pgtable_deposit
+#define arch_needs_pgtable_deposit() (false)
+#endif
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* This is an implementation of pmdp_establish() that is only suitable for an
@@ -1292,9 +1304,10 @@ static inline int pud_trans_huge(pud_t pud)
}
#endif
-/* See pmd_none_or_trans_huge_or_clear_bad for discussion. */
-static inline int pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud)
+static inline int pud_trans_unstable(pud_t *pud)
{
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
pud_t pudval = READ_ONCE(*pud);
if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval))
@@ -1303,121 +1316,10 @@ static inline int pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud)
pud_clear_bad(pud);
return 1;
}
- return 0;
-}
-
-/* See pmd_trans_unstable for discussion. */
-static inline int pud_trans_unstable(pud_t *pud)
-{
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
- defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
- return pud_none_or_trans_huge_or_dev_or_clear_bad(pud);
-#else
- return 0;
-#endif
-}
-
-#ifndef arch_needs_pgtable_deposit
-#define arch_needs_pgtable_deposit() (false)
-#endif
-/*
- * This function is meant to be used by sites walking pagetables with
- * the mmap_lock held in read mode to protect against MADV_DONTNEED and
- * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
- * into a null pmd and the transhuge page fault can convert a null pmd
- * into an hugepmd or into a regular pmd (if the hugepage allocation
- * fails). While holding the mmap_lock in read mode the pmd becomes
- * stable and stops changing under us only if it's not null and not a
- * transhuge pmd. When those races occurs and this function makes a
- * difference vs the standard pmd_none_or_clear_bad, the result is
- * undefined so behaving like if the pmd was none is safe (because it
- * can return none anyway). The compiler level barrier() is critically
- * important to compute the two checks atomically on the same pmdval.
- *
- * For 32bit kernels with a 64bit large pmd_t this automatically takes
- * care of reading the pmd atomically to avoid SMP race conditions
- * against pmd_populate() when the mmap_lock is hold for reading by the
- * caller (a special atomic read not done by "gcc" as in the generic
- * version above, is also needed when THP is disabled because the page
- * fault can populate the pmd from under us).
- */
-static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
-{
- pmd_t pmdval = pmdp_get_lockless(pmd);
- /*
- * The barrier will stabilize the pmdval in a register or on
- * the stack so that it will stop changing under the code.
- *
- * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
- * pmdp_get_lockless is allowed to return a not atomic pmdval
- * (for example pointing to an hugepage that has never been
- * mapped in the pmd). The below checks will only care about
- * the low part of the pmd with 32bit PAE x86 anyway, with the
- * exception of pmd_none(). So the important thing is that if
- * the low part of the pmd is found null, the high part will
- * be also null or the pmd_none() check below would be
- * confused.
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- barrier();
#endif
- /*
- * !pmd_present() checks for pmd migration entries
- *
- * The complete check uses is_pmd_migration_entry() in linux/swapops.h
- * But using that requires moving current function and pmd_trans_unstable()
- * to linux/swapops.h to resolve dependency, which is too much code move.
- *
- * !pmd_present() is equivalent to is_pmd_migration_entry() currently,
- * because !pmd_present() pages can only be under migration not swapped
- * out.
- *
- * pmd_none() is preserved for future condition checks on pmd migration
- * entries and not confusing with this function name, although it is
- * redundant with !pmd_present().
- */
- if (pmd_none(pmdval) || pmd_trans_huge(pmdval) ||
- (IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION) && !pmd_present(pmdval)))
- return 1;
- if (unlikely(pmd_bad(pmdval))) {
- pmd_clear_bad(pmd);
- return 1;
- }
return 0;
}
-/*
- * This is a noop if Transparent Hugepage Support is not built into
- * the kernel. Otherwise it is equivalent to
- * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
- * places that already verified the pmd is not none and they want to
- * walk ptes while holding the mmap sem in read mode (write mode don't
- * need this). If THP is not enabled, the pmd can't go away under the
- * code even if MADV_DONTNEED runs, but if THP is enabled we need to
- * run a pmd_trans_unstable before walking the ptes after
- * split_huge_pmd returns (because it may have run when the pmd become
- * null, but then a page fault can map in a THP and not a regular page).
- */
-static inline int pmd_trans_unstable(pmd_t *pmd)
-{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- return pmd_none_or_trans_huge_or_clear_bad(pmd);
-#else
- return 0;
-#endif
-}
-
-/*
- * the ordering of these checks is important for pmds with _page_devmap set.
- * if we check pmd_trans_unstable() first we will trip the bad_pmd() check
- * inside of pmd_none_or_trans_huge_or_clear_bad(). this will end up correctly
- * returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
- */
-static inline int pmd_devmap_trans_unstable(pmd_t *pmd)
-{
- return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
-}
-
#ifndef CONFIG_NUMA_BALANCING
/*
* Technically a PTE can be PROTNONE even when not doing NUMA balancing but
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index 917528d102c4..d506dc63dd47 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -7,6 +7,7 @@
struct inode *ramfs_get_inode(struct super_block *sb, const struct inode *dir,
umode_t mode, dev_t dev);
extern int ramfs_init_fs_context(struct fs_context *fc);
+extern void ramfs_kill_sb(struct super_block *sb);
#ifdef CONFIG_MMU
static inline int
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 375a5e90d86a..77df3d7b18a6 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -16,7 +16,7 @@ struct scatterlist {
#ifdef CONFIG_NEED_SG_DMA_LENGTH
unsigned int dma_length;
#endif
-#ifdef CONFIG_PCI_P2PDMA
+#ifdef CONFIG_NEED_SG_DMA_FLAGS
unsigned int dma_flags;
#endif
};
@@ -141,6 +141,30 @@ static inline void sg_set_page(struct scatterlist *sg, struct page *page,
sg->length = len;
}
+/**
+ * sg_set_folio - Set sg entry to point at given folio
+ * @sg: SG entry
+ * @folio: The folio
+ * @len: Length of data
+ * @offset: Offset into folio
+ *
+ * Description:
+ * Use this function to set an sg entry pointing at a folio, never assign
+ * the folio directly. We encode sg table information in the lower bits
+ * of the folio pointer. See sg_page() for looking up the page belonging
+ * to an sg entry.
+ *
+ **/
+static inline void sg_set_folio(struct scatterlist *sg, struct folio *folio,
+ size_t len, size_t offset)
+{
+ WARN_ON_ONCE(len > UINT_MAX);
+ WARN_ON_ONCE(offset > UINT_MAX);
+ sg_assign_page(sg, &folio->page);
+ sg->offset = offset;
+ sg->length = len;
+}
+
static inline struct page *sg_page(struct scatterlist *sg)
{
#ifdef CONFIG_DEBUG_SG
@@ -249,17 +273,18 @@ static inline void sg_unmark_end(struct scatterlist *sg)
}
/*
- * CONFGI_PCI_P2PDMA depends on CONFIG_64BIT which means there is 4 bytes
- * in struct scatterlist (assuming also CONFIG_NEED_SG_DMA_LENGTH is set).
- * Use this padding for DMA flags bits to indicate when a specific
- * dma address is a bus address.
+ * One 64-bit architectures there is a 4-byte padding in struct scatterlist
+ * (assuming also CONFIG_NEED_SG_DMA_LENGTH is set). Use this padding for DMA
+ * flags bits to indicate when a specific dma address is a bus address or the
+ * buffer may have been bounced via SWIOTLB.
*/
-#ifdef CONFIG_PCI_P2PDMA
+#ifdef CONFIG_NEED_SG_DMA_FLAGS
-#define SG_DMA_BUS_ADDRESS (1 << 0)
+#define SG_DMA_BUS_ADDRESS (1 << 0)
+#define SG_DMA_SWIOTLB (1 << 1)
/**
- * sg_dma_is_bus address - Return whether a given segment was marked
+ * sg_dma_is_bus_address - Return whether a given segment was marked
* as a bus address
* @sg: SG entry
*
@@ -267,13 +292,13 @@ static inline void sg_unmark_end(struct scatterlist *sg)
* Returns true if sg_dma_mark_bus_address() has been called on
* this segment.
**/
-static inline bool sg_is_dma_bus_address(struct scatterlist *sg)
+static inline bool sg_dma_is_bus_address(struct scatterlist *sg)
{
return sg->dma_flags & SG_DMA_BUS_ADDRESS;
}
/**
- * sg_dma_mark_bus address - Mark the scatterlist entry as a bus address
+ * sg_dma_mark_bus_address - Mark the scatterlist entry as a bus address
* @sg: SG entry
*
* Description:
@@ -299,9 +324,37 @@ static inline void sg_dma_unmark_bus_address(struct scatterlist *sg)
sg->dma_flags &= ~SG_DMA_BUS_ADDRESS;
}
+/**
+ * sg_dma_is_swiotlb - Return whether the scatterlist was marked for SWIOTLB
+ * bouncing
+ * @sg: SG entry
+ *
+ * Description:
+ * Returns true if the scatterlist was marked for SWIOTLB bouncing. Not all
+ * elements may have been bounced, so the caller would have to check
+ * individual SG entries with is_swiotlb_buffer().
+ */
+static inline bool sg_dma_is_swiotlb(struct scatterlist *sg)
+{
+ return sg->dma_flags & SG_DMA_SWIOTLB;
+}
+
+/**
+ * sg_dma_mark_swiotlb - Mark the scatterlist for SWIOTLB bouncing
+ * @sg: SG entry
+ *
+ * Description:
+ * Marks a a scatterlist for SWIOTLB bounce. Not all SG entries may be
+ * bounced.
+ */
+static inline void sg_dma_mark_swiotlb(struct scatterlist *sg)
+{
+ sg->dma_flags |= SG_DMA_SWIOTLB;
+}
+
#else
-static inline bool sg_is_dma_bus_address(struct scatterlist *sg)
+static inline bool sg_dma_is_bus_address(struct scatterlist *sg)
{
return false;
}
@@ -311,8 +364,15 @@ static inline void sg_dma_mark_bus_address(struct scatterlist *sg)
static inline void sg_dma_unmark_bus_address(struct scatterlist *sg)
{
}
+static inline bool sg_dma_is_swiotlb(struct scatterlist *sg)
+{
+ return false;
+}
+static inline void sg_dma_mark_swiotlb(struct scatterlist *sg)
+{
+}
-#endif
+#endif /* CONFIG_NEED_SG_DMA_FLAGS */
/**
* sg_phys - Return physical address of an sg entry
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b0011c50da4f..609bde814cb0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -41,7 +41,6 @@
/* task_struct member predeclarations (sorted alphabetically): */
struct audit_context;
-struct backing_dev_info;
struct bio_list;
struct blk_plug;
struct bpf_local_storage;
@@ -1186,8 +1185,6 @@ struct task_struct {
/* VM state: */
struct reclaim_state *reclaim_state;
- struct backing_dev_info *backing_dev_info;
-
struct io_context *io_context;
#ifdef CONFIG_COMPACTION
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 6b3e155b70bf..ca53425e9b32 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -12,6 +12,7 @@
#ifndef _LINUX_SLAB_H
#define _LINUX_SLAB_H
+#include <linux/cache.h>
#include <linux/gfp.h>
#include <linux/overflow.h>
#include <linux/types.h>
@@ -235,12 +236,17 @@ void kmem_dump_obj(void *object);
* alignment larger than the alignment of a 64-bit integer.
* Setting ARCH_DMA_MINALIGN in arch headers allows that.
*/
-#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8
+#ifdef ARCH_HAS_DMA_MINALIGN
+#if ARCH_DMA_MINALIGN > 8 && !defined(ARCH_KMALLOC_MINALIGN)
#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
-#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN
-#define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN)
-#else
+#endif
+#endif
+
+#ifndef ARCH_KMALLOC_MINALIGN
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
+#elif ARCH_KMALLOC_MINALIGN > 8
+#define KMALLOC_MIN_SIZE ARCH_KMALLOC_MINALIGN
+#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
#endif
/*
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 3b10636c51a9..b4903b87362a 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -223,7 +223,7 @@ struct svc_rqst {
struct page * *rq_next_page; /* next reply page to use */
struct page * *rq_page_end; /* one past the last page */
- struct pagevec rq_pvec;
+ struct folio_batch rq_fbatch;
struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
struct bio_vec rq_bvec[RPCSVC_MAXPAGES];
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 4d0095e8989e..ef503088942d 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -367,9 +367,6 @@ struct pbe {
struct pbe *next;
};
-/* mm/page_alloc.c */
-extern void mark_free_pages(struct zone *zone);
-
/**
* struct platform_hibernation_ops - hibernation platform support
*
@@ -513,6 +510,11 @@ extern void pm_report_max_hw_sleep(u64 t);
/* drivers/base/power/wakeup.c */
extern bool events_check_enabled;
+static inline bool pm_suspended_storage(void)
+{
+ return !gfp_has_io_fs(gfp_allowed_mask);
+}
+
extern bool pm_wakeup_pending(void);
extern void pm_system_wakeup(void);
extern void pm_system_cancel_wakeup(void);
@@ -546,6 +548,7 @@ static inline void ksys_sync_helper(void) {}
#define pm_notifier(fn, pri) do { (void)(fn); } while (0)
+static inline bool pm_suspended_storage(void) { return false; }
static inline bool pm_wakeup_pending(void) { return false; }
static inline void pm_system_wakeup(void) {}
static inline void pm_wakeup_clear(bool reset) {}
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3c69cb653cb9..456546443f1f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -337,25 +337,6 @@ struct swap_info_struct {
*/
};
-#ifdef CONFIG_64BIT
-#define SWAP_RA_ORDER_CEILING 5
-#else
-/* Avoid stack overflow, because we need to save part of page table */
-#define SWAP_RA_ORDER_CEILING 3
-#define SWAP_RA_PTE_CACHE_SIZE (1 << SWAP_RA_ORDER_CEILING)
-#endif
-
-struct vma_swap_readahead {
- unsigned short win;
- unsigned short offset;
- unsigned short nr_pte;
-#ifdef CONFIG_64BIT
- pte_t *ptes;
-#else
- pte_t ptes[SWAP_RA_PTE_CACHE_SIZE];
-#endif
-};
-
static inline swp_entry_t folio_swap_entry(struct folio *folio)
{
swp_entry_t entry = { .val = page_private(&folio->page) };
@@ -368,6 +349,7 @@ static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
}
/* linux/mm/workingset.c */
+bool workingset_test_recent(void *shadow, bool file, bool *workingset);
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
void workingset_refault(struct folio *folio, void *shadow);
@@ -457,10 +439,9 @@ static inline bool node_reclaim_enabled(void)
}
void check_move_unevictable_folios(struct folio_batch *fbatch);
-void check_move_unevictable_pages(struct pagevec *pvec);
-extern void kswapd_run(int nid);
-extern void kswapd_stop(int nid);
+extern void __meminit kswapd_run(int nid);
+extern void __meminit kswapd_stop(int nid);
#ifdef CONFIG_SWAP
@@ -512,7 +493,7 @@ int find_first_swap(dev_t *device);
extern unsigned int count_swap_pages(int, int);
extern sector_t swapdev_block(int, pgoff_t);
extern int __swap_count(swp_entry_t entry);
-extern int __swp_swapcount(swp_entry_t entry);
+extern int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry);
extern int swp_swapcount(swp_entry_t entry);
extern struct swap_info_struct *page_swap_info(struct page *);
extern struct swap_info_struct *swp_swap_info(swp_entry_t entry);
@@ -590,7 +571,7 @@ static inline int __swap_count(swp_entry_t entry)
return 0;
}
-static inline int __swp_swapcount(swp_entry_t entry)
+static inline int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
{
return 0;
}
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 3a451b7afcb3..4c932cb45e0b 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -332,15 +332,9 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
return false;
}
-extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
- spinlock_t *ptl);
extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
unsigned long address);
-#ifdef CONFIG_HUGETLB_PAGE
-extern void __migration_entry_wait_huge(struct vm_area_struct *vma,
- pte_t *ptep, spinlock_t *ptl);
extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte);
-#endif /* CONFIG_HUGETLB_PAGE */
#else /* CONFIG_MIGRATION */
static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
{
@@ -362,15 +356,10 @@ static inline int is_migration_entry(swp_entry_t swp)
return 0;
}
-static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
- spinlock_t *ptl) { }
static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
- unsigned long address) { }
-#ifdef CONFIG_HUGETLB_PAGE
-static inline void __migration_entry_wait_huge(struct vm_area_struct *vma,
- pte_t *ptep, spinlock_t *ptl) { }
-static inline void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte) { }
-#endif /* CONFIG_HUGETLB_PAGE */
+ unsigned long address) { }
+static inline void migration_entry_wait_huge(struct vm_area_struct *vma,
+ pte_t *pte) { }
static inline int is_writable_migration_entry(swp_entry_t entry)
{
return 0;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 24871f8ec8bb..d18ce144037e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -72,6 +72,8 @@ struct open_how;
struct mount_attr;
struct landlock_ruleset_attr;
enum landlock_rule_type;
+struct cachestat_range;
+struct cachestat;
#include <linux/types.h>
#include <linux/aio_abi.h>
@@ -1058,6 +1060,9 @@ asmlinkage long sys_memfd_secret(unsigned int flags);
asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long len,
unsigned long home_node,
unsigned long flags);
+asmlinkage long sys_cachestat(unsigned int fd,
+ struct cachestat_range __user *cstat_range,
+ struct cachestat __user *cstat, unsigned int flags);
/*
* Architecture-specific system calls
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index d78b01524349..ac7b0c96d351 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -188,8 +188,8 @@ extern bool userfaultfd_remove(struct vm_area_struct *vma,
unsigned long start,
unsigned long end);
-extern int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start,
- unsigned long end, struct list_head *uf);
+extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, struct list_head *uf);
extern void userfaultfd_unmap_complete(struct mm_struct *mm,
struct list_head *uf);
extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma);
@@ -271,7 +271,7 @@ static inline bool userfaultfd_remove(struct vm_area_struct *vma,
return true;
}
-static inline int userfaultfd_unmap_prep(struct mm_struct *mm,
+static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct list_head *uf)
{
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index e8997010612a..3296438eec06 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -14,10 +14,6 @@
struct zpool;
-struct zpool_ops {
- int (*evict)(struct zpool *pool, unsigned long handle);
-};
-
/*
* Control how a handle is mapped. It will be ignored if the
* implementation does not support it. Its use is optional.
@@ -39,8 +35,7 @@ enum zpool_mapmode {
bool zpool_has_pool(char *type);
-struct zpool *zpool_create_pool(const char *type, const char *name,
- gfp_t gfp, const struct zpool_ops *ops);
+struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp);
const char *zpool_get_type(struct zpool *pool);
@@ -53,9 +48,6 @@ int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
void zpool_free(struct zpool *pool, unsigned long handle);
-int zpool_shrink(struct zpool *pool, unsigned int pages,
- unsigned int *reclaimed);
-
void *zpool_map_handle(struct zpool *pool, unsigned long handle,
enum zpool_mapmode mm);
@@ -72,7 +64,6 @@ u64 zpool_get_total_size(struct zpool *pool);
* @destroy: destroy a pool.
* @malloc: allocate mem from a pool.
* @free: free mem from a pool.
- * @shrink: shrink the pool.
* @sleep_mapped: whether zpool driver can sleep during map.
* @map: map a handle.
* @unmap: unmap a handle.
@@ -87,10 +78,7 @@ struct zpool_driver {
atomic_t refcount;
struct list_head list;
- void *(*create)(const char *name,
- gfp_t gfp,
- const struct zpool_ops *ops,
- struct zpool *zpool);
+ void *(*create)(const char *name, gfp_t gfp);
void (*destroy)(void *pool);
bool malloc_support_movable;
@@ -98,9 +86,6 @@ struct zpool_driver {
unsigned long *handle);
void (*free)(void *pool, unsigned long handle);
- int (*shrink)(void *pool, unsigned int pages,
- unsigned int *reclaimed);
-
bool sleep_mapped;
void *(*map)(void *pool, unsigned long handle,
enum zpool_mapmode mm);
@@ -113,7 +98,6 @@ void zpool_register_driver(struct zpool_driver *driver);
int zpool_unregister_driver(struct zpool_driver *driver);
-bool zpool_evictable(struct zpool *pool);
bool zpool_can_sleep_mapped(struct zpool *pool);
#endif