summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile2
-rw-r--r--mm/cma.c12
-rw-r--r--mm/huge_memory.c25
-rw-r--r--mm/hugetlb.c4
-rw-r--r--mm/iov_iter.c753
-rw-r--r--mm/kasan/kasan.c14
-rw-r--r--mm/memcontrol.c20
-rw-r--r--mm/memory.c17
-rw-r--r--mm/memory_hotplug.c13
-rw-r--r--mm/mlock.c4
-rw-r--r--mm/mmap.c4
-rw-r--r--mm/mprotect.c3
-rw-r--r--mm/mremap.c10
-rw-r--r--mm/nommu.c5
-rw-r--r--mm/page-writeback.c7
-rw-r--r--mm/page_alloc.c12
-rw-r--r--mm/page_isolation.c1
-rw-r--r--mm/pagewalk.c9
-rw-r--r--mm/rmap.c7
-rw-r--r--mm/shmem.c3
-rw-r--r--mm/slub.c6
-rw-r--r--mm/vmalloc.c1
22 files changed, 119 insertions, 813 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 3c1caa2693bd..15dbe9903c27 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -21,7 +21,7 @@ obj-y := filemap.o mempool.o oom_kill.o \
mm_init.o mmu_context.o percpu.o slab_common.o \
compaction.o vmacache.o \
interval_tree.o list_lru.o workingset.o \
- iov_iter.o debug.o $(mmu-y)
+ debug.o $(mmu-y)
obj-y += init-mm.o
diff --git a/mm/cma.c b/mm/cma.c
index 75016fd1de90..68ecb7a42983 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -64,15 +64,17 @@ static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order)
return (1UL << (align_order - cma->order_per_bit)) - 1;
}
+/*
+ * Find a PFN aligned to the specified order and return an offset represented in
+ * order_per_bits.
+ */
static unsigned long cma_bitmap_aligned_offset(struct cma *cma, int align_order)
{
- unsigned int alignment;
-
if (align_order <= cma->order_per_bit)
return 0;
- alignment = 1UL << (align_order - cma->order_per_bit);
- return ALIGN(cma->base_pfn, alignment) -
- (cma->base_pfn >> cma->order_per_bit);
+
+ return (ALIGN(cma->base_pfn, (1UL << align_order))
+ - cma->base_pfn) >> cma->order_per_bit;
}
static unsigned long cma_bitmap_maxno(struct cma *cma)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fc00c8cb5a82..6817b0350c71 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1260,6 +1260,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
int target_nid, last_cpupid = -1;
bool page_locked;
bool migrated = false;
+ bool was_writable;
int flags = 0;
/* A PROT_NONE fault should not end up here */
@@ -1291,12 +1292,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
flags |= TNF_FAULT_LOCAL;
}
- /*
- * Avoid grouping on DSO/COW pages in specific and RO pages
- * in general, RO pages shouldn't hurt as much anyway since
- * they can be in shared cache state.
- */
- if (!pmd_write(pmd))
+ /* See similar comment in do_numa_page for explanation */
+ if (!(vma->vm_flags & VM_WRITE))
flags |= TNF_NO_GROUP;
/*
@@ -1353,12 +1350,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (migrated) {
flags |= TNF_MIGRATED;
page_nid = target_nid;
- }
+ } else
+ flags |= TNF_MIGRATE_FAIL;
goto out;
clear_pmdnuma:
BUG_ON(!PageLocked(page));
+ was_writable = pmd_write(pmd);
pmd = pmd_modify(pmd, vma->vm_page_prot);
+ pmd = pmd_mkyoung(pmd);
+ if (was_writable)
+ pmd = pmd_mkwrite(pmd);
set_pmd_at(mm, haddr, pmdp, pmd);
update_mmu_cache_pmd(vma, addr, pmdp);
unlock_page(page);
@@ -1482,6 +1484,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
pmd_t entry;
+ bool preserve_write = prot_numa && pmd_write(*pmd);
+ ret = 1;
/*
* Avoid trapping faults against the zero page. The read-only
@@ -1490,16 +1494,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
*/
if (prot_numa && is_huge_zero_pmd(*pmd)) {
spin_unlock(ptl);
- return 0;
+ return ret;
}
if (!prot_numa || !pmd_protnone(*pmd)) {
- ret = 1;
entry = pmdp_get_and_clear_notify(mm, addr, pmd);
entry = pmd_modify(entry, newprot);
+ if (preserve_write)
+ entry = pmd_mkwrite(entry);
ret = HPAGE_PMD_NR;
set_pmd_at(mm, addr, pmd, entry);
- BUG_ON(pmd_write(entry));
+ BUG_ON(!preserve_write && pmd_write(entry));
}
spin_unlock(ptl);
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0a9ac6c26832..c41b2a0ee273 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -917,7 +917,6 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
__SetPageHead(page);
__ClearPageReserved(page);
for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
- __SetPageTail(p);
/*
* For gigantic hugepages allocated through bootmem at
* boot, it's safer to be consistent with the not-gigantic
@@ -933,6 +932,9 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
__ClearPageReserved(p);
set_page_count(p, 0);
p->first_page = page;
+ /* Make sure p->first_page is always valid for PageTail() */
+ smp_wmb();
+ __SetPageTail(p);
}
}
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
deleted file mode 100644
index 827732047da1..000000000000
--- a/mm/iov_iter.c
+++ /dev/null
@@ -1,753 +0,0 @@
-#include <linux/export.h>
-#include <linux/uio.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <net/checksum.h>
-
-#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
- size_t left; \
- size_t wanted = n; \
- __p = i->iov; \
- __v.iov_len = min(n, __p->iov_len - skip); \
- if (likely(__v.iov_len)) { \
- __v.iov_base = __p->iov_base + skip; \
- left = (STEP); \
- __v.iov_len -= left; \
- skip += __v.iov_len; \
- n -= __v.iov_len; \
- } else { \
- left = 0; \
- } \
- while (unlikely(!left && n)) { \
- __p++; \
- __v.iov_len = min(n, __p->iov_len); \
- if (unlikely(!__v.iov_len)) \
- continue; \
- __v.iov_base = __p->iov_base; \
- left = (STEP); \
- __v.iov_len -= left; \
- skip = __v.iov_len; \
- n -= __v.iov_len; \
- } \
- n = wanted - n; \
-}
-
-#define iterate_kvec(i, n, __v, __p, skip, STEP) { \
- size_t wanted = n; \
- __p = i->kvec; \
- __v.iov_len = min(n, __p->iov_len - skip); \
- if (likely(__v.iov_len)) { \
- __v.iov_base = __p->iov_base + skip; \
- (void)(STEP); \
- skip += __v.iov_len; \
- n -= __v.iov_len; \
- } \
- while (unlikely(n)) { \
- __p++; \
- __v.iov_len = min(n, __p->iov_len); \
- if (unlikely(!__v.iov_len)) \
- continue; \
- __v.iov_base = __p->iov_base; \
- (void)(STEP); \
- skip = __v.iov_len; \
- n -= __v.iov_len; \
- } \
- n = wanted; \
-}
-
-#define iterate_bvec(i, n, __v, __p, skip, STEP) { \
- size_t wanted = n; \
- __p = i->bvec; \
- __v.bv_len = min_t(size_t, n, __p->bv_len - skip); \
- if (likely(__v.bv_len)) { \
- __v.bv_page = __p->bv_page; \
- __v.bv_offset = __p->bv_offset + skip; \
- (void)(STEP); \
- skip += __v.bv_len; \
- n -= __v.bv_len; \
- } \
- while (unlikely(n)) { \
- __p++; \
- __v.bv_len = min_t(size_t, n, __p->bv_len); \
- if (unlikely(!__v.bv_len)) \
- continue; \
- __v.bv_page = __p->bv_page; \
- __v.bv_offset = __p->bv_offset; \
- (void)(STEP); \
- skip = __v.bv_len; \
- n -= __v.bv_len; \
- } \
- n = wanted; \
-}
-
-#define iterate_all_kinds(i, n, v, I, B, K) { \
- size_t skip = i->iov_offset; \
- if (unlikely(i->type & ITER_BVEC)) { \
- const struct bio_vec *bvec; \
- struct bio_vec v; \
- iterate_bvec(i, n, v, bvec, skip, (B)) \
- } else if (unlikely(i->type & ITER_KVEC)) { \
- const struct kvec *kvec; \
- struct kvec v; \
- iterate_kvec(i, n, v, kvec, skip, (K)) \
- } else { \
- const struct iovec *iov; \
- struct iovec v; \
- iterate_iovec(i, n, v, iov, skip, (I)) \
- } \
-}
-
-#define iterate_and_advance(i, n, v, I, B, K) { \
- size_t skip = i->iov_offset; \
- if (unlikely(i->type & ITER_BVEC)) { \
- const struct bio_vec *bvec; \
- struct bio_vec v; \
- iterate_bvec(i, n, v, bvec, skip, (B)) \
- if (skip == bvec->bv_len) { \
- bvec++; \
- skip = 0; \
- } \
- i->nr_segs -= bvec - i->bvec; \
- i->bvec = bvec; \
- } else if (unlikely(i->type & ITER_KVEC)) { \
- const struct kvec *kvec; \
- struct kvec v; \
- iterate_kvec(i, n, v, kvec, skip, (K)) \
- if (skip == kvec->iov_len) { \
- kvec++; \
- skip = 0; \
- } \
- i->nr_segs -= kvec - i->kvec; \
- i->kvec = kvec; \
- } else { \
- const struct iovec *iov; \
- struct iovec v; \
- iterate_iovec(i, n, v, iov, skip, (I)) \
- if (skip == iov->iov_len) { \
- iov++; \
- skip = 0; \
- } \
- i->nr_segs -= iov - i->iov; \
- i->iov = iov; \
- } \
- i->count -= n; \
- i->iov_offset = skip; \
-}
-
-static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
- struct iov_iter *i)
-{
- size_t skip, copy, left, wanted;
- const struct iovec *iov;
- char __user *buf;
- void *kaddr, *from;
-
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
- wanted = bytes;
- iov = i->iov;
- skip = i->iov_offset;
- buf = iov->iov_base + skip;
- copy = min(bytes, iov->iov_len - skip);
-
- if (!fault_in_pages_writeable(buf, copy)) {
- kaddr = kmap_atomic(page);
- from = kaddr + offset;
-
- /* first chunk, usually the only one */
- left = __copy_to_user_inatomic(buf, from, copy);
- copy -= left;
- skip += copy;
- from += copy;
- bytes -= copy;
-
- while (unlikely(!left && bytes)) {
- iov++;
- buf = iov->iov_base;
- copy = min(bytes, iov->iov_len);
- left = __copy_to_user_inatomic(buf, from, copy);
- copy -= left;
- skip = copy;
- from += copy;
- bytes -= copy;
- }
- if (likely(!bytes)) {
- kunmap_atomic(kaddr);
- goto done;
- }
- offset = from - kaddr;
- buf += copy;
- kunmap_atomic(kaddr);
- copy = min(bytes, iov->iov_len - skip);
- }
- /* Too bad - revert to non-atomic kmap */
- kaddr = kmap(page);
- from = kaddr + offset;
- left = __copy_to_user(buf, from, copy);
- copy -= left;
- skip += copy;
- from += copy;
- bytes -= copy;
- while (unlikely(!left && bytes)) {
- iov++;
- buf = iov->iov_base;
- copy = min(bytes, iov->iov_len);
- left = __copy_to_user(buf, from, copy);
- copy -= left;
- skip = copy;
- from += copy;
- bytes -= copy;
- }
- kunmap(page);
-done:
- if (skip == iov->iov_len) {
- iov++;
- skip = 0;
- }
- i->count -= wanted - bytes;
- i->nr_segs -= iov - i->iov;
- i->iov = iov;
- i->iov_offset = skip;
- return wanted - bytes;
-}
-
-static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
- struct iov_iter *i)
-{
- size_t skip, copy, left, wanted;
- const struct iovec *iov;
- char __user *buf;
- void *kaddr, *to;
-
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
- wanted = bytes;
- iov = i->iov;
- skip = i->iov_offset;
- buf = iov->iov_base + skip;
- copy = min(bytes, iov->iov_len - skip);
-
- if (!fault_in_pages_readable(buf, copy)) {
- kaddr = kmap_atomic(page);
- to = kaddr + offset;
-
- /* first chunk, usually the only one */
- left = __copy_from_user_inatomic(to, buf, copy);
- copy -= left;
- skip += copy;
- to += copy;
- bytes -= copy;
-
- while (unlikely(!left && bytes)) {
- iov++;
- buf = iov->iov_base;
- copy = min(bytes, iov->iov_len);
- left = __copy_from_user_inatomic(to, buf, copy);
- copy -= left;
- skip = copy;
- to += copy;
- bytes -= copy;
- }
- if (likely(!bytes)) {
- kunmap_atomic(kaddr);
- goto done;
- }
- offset = to - kaddr;
- buf += copy;
- kunmap_atomic(kaddr);
- copy = min(bytes, iov->iov_len - skip);
- }
- /* Too bad - revert to non-atomic kmap */
- kaddr = kmap(page);
- to = kaddr + offset;
- left = __copy_from_user(to, buf, copy);
- copy -= left;
- skip += copy;
- to += copy;
- bytes -= copy;
- while (unlikely(!left && bytes)) {
- iov++;
- buf = iov->iov_base;
- copy = min(bytes, iov->iov_len);
- left = __copy_from_user(to, buf, copy);
- copy -= left;
- skip = copy;
- to += copy;
- bytes -= copy;
- }
- kunmap(page);
-done:
- if (skip == iov->iov_len) {
- iov++;
- skip = 0;
- }
- i->count -= wanted - bytes;
- i->nr_segs -= iov - i->iov;
- i->iov = iov;
- i->iov_offset = skip;
- return wanted - bytes;
-}
-
-/*
- * Fault in the first iovec of the given iov_iter, to a maximum length
- * of bytes. Returns 0 on success, or non-zero if the memory could not be
- * accessed (ie. because it is an invalid address).
- *
- * writev-intensive code may want this to prefault several iovecs -- that
- * would be possible (callers must not rely on the fact that _only_ the
- * first iovec will be faulted with the current implementation).
- */
-int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
-{
- if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
- char __user *buf = i->iov->iov_base + i->iov_offset;
- bytes = min(bytes, i->iov->iov_len - i->iov_offset);
- return fault_in_pages_readable(buf, bytes);
- }
- return 0;
-}
-EXPORT_SYMBOL(iov_iter_fault_in_readable);
-
-void iov_iter_init(struct iov_iter *i, int direction,
- const struct iovec *iov, unsigned long nr_segs,
- size_t count)
-{
- /* It will get better. Eventually... */
- if (segment_eq(get_fs(), KERNEL_DS)) {
- direction |= ITER_KVEC;
- i->type = direction;
- i->kvec = (struct kvec *)iov;
- } else {
- i->type = direction;
- i->iov = iov;
- }
- i->nr_segs = nr_segs;
- i->iov_offset = 0;
- i->count = count;
-}
-EXPORT_SYMBOL(iov_iter_init);
-
-static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
-{
- char *from = kmap_atomic(page);
- memcpy(to, from + offset, len);
- kunmap_atomic(from);
-}
-
-static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len)
-{
- char *to = kmap_atomic(page);
- memcpy(to + offset, from, len);
- kunmap_atomic(to);
-}
-
-static void memzero_page(struct page *page, size_t offset, size_t len)
-{
- char *addr = kmap_atomic(page);
- memset(addr + offset, 0, len);
- kunmap_atomic(addr);
-}
-
-size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
-{
- char *from = addr;
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
- iterate_and_advance(i, bytes, v,
- __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
- v.iov_len),
- memcpy_to_page(v.bv_page, v.bv_offset,
- (from += v.bv_len) - v.bv_len, v.bv_len),
- memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
- )
-
- return bytes;
-}
-EXPORT_SYMBOL(copy_to_iter);
-
-size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
-{
- char *to = addr;
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
- iterate_and_advance(i, bytes, v,
- __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
- v.iov_len),
- memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
- v.bv_offset, v.bv_len),
- memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
- )
-
- return bytes;
-}
-EXPORT_SYMBOL(copy_from_iter);
-
-size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
-{
- char *to = addr;
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
- iterate_and_advance(i, bytes, v,
- __copy_from_user_nocache((to += v.iov_len) - v.iov_len,
- v.iov_base, v.iov_len),
- memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
- v.bv_offset, v.bv_len),
- memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
- )
-
- return bytes;
-}
-EXPORT_SYMBOL(copy_from_iter_nocache);
-
-size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
- struct iov_iter *i)
-{
- if (i->type & (ITER_BVEC|ITER_KVEC)) {
- void *kaddr = kmap_atomic(page);
- size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
- kunmap_atomic(kaddr);
- return wanted;
- } else
- return copy_page_to_iter_iovec(page, offset, bytes, i);
-}
-EXPORT_SYMBOL(copy_page_to_iter);
-
-size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
- struct iov_iter *i)
-{
- if (i->type & (ITER_BVEC|ITER_KVEC)) {
- void *kaddr = kmap_atomic(page);
- size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
- kunmap_atomic(kaddr);
- return wanted;
- } else
- return copy_page_from_iter_iovec(page, offset, bytes, i);
-}
-EXPORT_SYMBOL(copy_page_from_iter);
-
-size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
-{
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
- iterate_and_advance(i, bytes, v,
- __clear_user(v.iov_base, v.iov_len),
- memzero_page(v.bv_page, v.bv_offset, v.bv_len),
- memset(v.iov_base, 0, v.iov_len)
- )
-
- return bytes;
-}
-EXPORT_SYMBOL(iov_iter_zero);
-
-size_t iov_iter_copy_from_user_atomic(struct page *page,
- struct iov_iter *i, unsigned long offset, size_t bytes)
-{
- char *kaddr = kmap_atomic(page), *p = kaddr + offset;
- iterate_all_kinds(i, bytes, v,
- __copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
- v.iov_base, v.iov_len),
- memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
- v.bv_offset, v.bv_len),
- memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
- )
- kunmap_atomic(kaddr);
- return bytes;
-}
-EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
-
-void iov_iter_advance(struct iov_iter *i, size_t size)
-{
- iterate_and_advance(i, size, v, 0, 0, 0)
-}
-EXPORT_SYMBOL(iov_iter_advance);
-
-/*
- * Return the count of just the current iov_iter segment.
- */
-size_t iov_iter_single_seg_count(const struct iov_iter *i)
-{
- if (i->nr_segs == 1)
- return i->count;
- else if (i->type & ITER_BVEC)
- return min(i->count, i->bvec->bv_len - i->iov_offset);
- else
- return min(i->count, i->iov->iov_len - i->iov_offset);
-}
-EXPORT_SYMBOL(iov_iter_single_seg_count);
-
-void iov_iter_kvec(struct iov_iter *i, int direction,
- const struct kvec *kvec, unsigned long nr_segs,
- size_t count)
-{
- BUG_ON(!(direction & ITER_KVEC));
- i->type = direction;
- i->kvec = kvec;
- i->nr_segs = nr_segs;
- i->iov_offset = 0;
- i->count = count;
-}
-EXPORT_SYMBOL(iov_iter_kvec);
-
-void iov_iter_bvec(struct iov_iter *i, int direction,
- const struct bio_vec *bvec, unsigned long nr_segs,
- size_t count)
-{
- BUG_ON(!(direction & ITER_BVEC));
- i->type = direction;
- i->bvec = bvec;
- i->nr_segs = nr_segs;
- i->iov_offset = 0;
- i->count = count;
-}
-EXPORT_SYMBOL(iov_iter_bvec);
-
-unsigned long iov_iter_alignment(const struct iov_iter *i)
-{
- unsigned long res = 0;
- size_t size = i->count;
-
- if (!size)
- return 0;
-
- iterate_all_kinds(i, size, v,
- (res |= (unsigned long)v.iov_base | v.iov_len, 0),
- res |= v.bv_offset | v.bv_len,
- res |= (unsigned long)v.iov_base | v.iov_len
- )
- return res;
-}
-EXPORT_SYMBOL(iov_iter_alignment);
-
-ssize_t iov_iter_get_pages(struct iov_iter *i,
- struct page **pages, size_t maxsize, unsigned maxpages,
- size_t *start)
-{
- if (maxsize > i->count)
- maxsize = i->count;
-
- if (!maxsize)
- return 0;
-
- iterate_all_kinds(i, maxsize, v, ({
- unsigned long addr = (unsigned long)v.iov_base;
- size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
- int n;
- int res;
-
- if (len > maxpages * PAGE_SIZE)
- len = maxpages * PAGE_SIZE;
- addr &= ~(PAGE_SIZE - 1);
- n = DIV_ROUND_UP(len, PAGE_SIZE);
- res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
- if (unlikely(res < 0))
- return res;
- return (res == n ? len : res * PAGE_SIZE) - *start;
- 0;}),({
- /* can't be more than PAGE_SIZE */
- *start = v.bv_offset;
- get_page(*pages = v.bv_page);
- return v.bv_len;
- }),({
- return -EFAULT;
- })
- )
- return 0;
-}
-EXPORT_SYMBOL(iov_iter_get_pages);
-
-static struct page **get_pages_array(size_t n)
-{
- struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
- if (!p)
- p = vmalloc(n * sizeof(struct page *));
- return p;
-}
-
-ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
- struct page ***pages, size_t maxsize,
- size_t *start)
-{
- struct page **p;
-
- if (maxsize > i->count)
- maxsize = i->count;
-
- if (!maxsize)
- return 0;
-
- iterate_all_kinds(i, maxsize, v, ({
- unsigned long addr = (unsigned long)v.iov_base;
- size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
- int n;
- int res;
-
- addr &= ~(PAGE_SIZE - 1);
- n = DIV_ROUND_UP(len, PAGE_SIZE);
- p = get_pages_array(n);
- if (!p)
- return -ENOMEM;
- res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
- if (unlikely(res < 0)) {
- kvfree(p);
- return res;
- }
- *pages = p;
- return (res == n ? len : res * PAGE_SIZE) - *start;
- 0;}),({
- /* can't be more than PAGE_SIZE */
- *start = v.bv_offset;
- *pages = p = get_pages_array(1);
- if (!p)
- return -ENOMEM;
- get_page(*p = v.bv_page);
- return v.bv_len;
- }),({
- return -EFAULT;
- })
- )
- return 0;
-}
-EXPORT_SYMBOL(iov_iter_get_pages_alloc);
-
-size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
- struct iov_iter *i)
-{
- char *to = addr;
- __wsum sum, next;
- size_t off = 0;
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
- sum = *csum;
- iterate_and_advance(i, bytes, v, ({
- int err = 0;
- next = csum_and_copy_from_user(v.iov_base,
- (to += v.iov_len) - v.iov_len,
- v.iov_len, 0, &err);
- if (!err) {
- sum = csum_block_add(sum, next, off);
- off += v.iov_len;
- }
- err ? v.iov_len : 0;
- }), ({
- char *p = kmap_atomic(v.bv_page);
- next = csum_partial_copy_nocheck(p + v.bv_offset,
- (to += v.bv_len) - v.bv_len,
- v.bv_len, 0);
- kunmap_atomic(p);
- sum = csum_block_add(sum, next, off);
- off += v.bv_len;
- }),({
- next = csum_partial_copy_nocheck(v.iov_base,
- (to += v.iov_len) - v.iov_len,
- v.iov_len, 0);
- sum = csum_block_add(sum, next, off);
- off += v.iov_len;
- })
- )
- *csum = sum;
- return bytes;
-}
-EXPORT_SYMBOL(csum_and_copy_from_iter);
-
-size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum,
- struct iov_iter *i)
-{
- char *from = addr;
- __wsum sum, next;
- size_t off = 0;
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
- sum = *csum;
- iterate_and_advance(i, bytes, v, ({
- int err = 0;
- next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
- v.iov_base,
- v.iov_len, 0, &err);
- if (!err) {
- sum = csum_block_add(sum, next, off);
- off += v.iov_len;
- }
- err ? v.iov_len : 0;
- }), ({
- char *p = kmap_atomic(v.bv_page);
- next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len,
- p + v.bv_offset,
- v.bv_len, 0);
- kunmap_atomic(p);
- sum = csum_block_add(sum, next, off);
- off += v.bv_len;
- }),({
- next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len,
- v.iov_base,
- v.iov_len, 0);
- sum = csum_block_add(sum, next, off);
- off += v.iov_len;
- })
- )
- *csum = sum;
- return bytes;
-}
-EXPORT_SYMBOL(csum_and_copy_to_iter);
-
-int iov_iter_npages(const struct iov_iter *i, int maxpages)
-{
- size_t size = i->count;
- int npages = 0;
-
- if (!size)
- return 0;
-
- iterate_all_kinds(i, size, v, ({
- unsigned long p = (unsigned long)v.iov_base;
- npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
- - p / PAGE_SIZE;
- if (npages >= maxpages)
- return maxpages;
- 0;}),({
- npages++;
- if (npages >= maxpages)
- return maxpages;
- }),({
- unsigned long p = (unsigned long)v.iov_base;
- npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
- - p / PAGE_SIZE;
- if (npages >= maxpages)
- return maxpages;
- })
- )
- return npages;
-}
-EXPORT_SYMBOL(iov_iter_npages);
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 78fee632a7ee..936d81661c47 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -29,6 +29,7 @@
#include <linux/stacktrace.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <linux/vmalloc.h>
#include <linux/kasan.h>
#include "kasan.h"
@@ -414,12 +415,19 @@ int kasan_module_alloc(void *addr, size_t size)
GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE,
__builtin_return_address(0));
- return ret ? 0 : -ENOMEM;
+
+ if (ret) {
+ find_vm_area(addr)->flags |= VM_KASAN;
+ return 0;
+ }
+
+ return -ENOMEM;
}
-void kasan_module_free(void *addr)
+void kasan_free_shadow(const struct vm_struct *vm)
{
- vfree(kasan_mem_to_shadow(addr));
+ if (vm->flags & VM_KASAN)
+ vfree(kasan_mem_to_shadow(vm->addr));
}
static void register_global(struct kasan_global *global)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d18d3a6e7337..b34ef4a32a3b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5232,7 +5232,9 @@ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css)
* on for the root memcg is enough.
*/
if (cgroup_on_dfl(root_css->cgroup))
- mem_cgroup_from_css(root_css)->use_hierarchy = true;
+ root_mem_cgroup->use_hierarchy = true;
+ else
+ root_mem_cgroup->use_hierarchy = false;
}
static u64 memory_current_read(struct cgroup_subsys_state *css,
@@ -5247,7 +5249,7 @@ static int memory_low_show(struct seq_file *m, void *v)
unsigned long low = ACCESS_ONCE(memcg->low);
if (low == PAGE_COUNTER_MAX)
- seq_puts(m, "infinity\n");
+ seq_puts(m, "max\n");
else
seq_printf(m, "%llu\n", (u64)low * PAGE_SIZE);
@@ -5262,7 +5264,7 @@ static ssize_t memory_low_write(struct kernfs_open_file *of,
int err;
buf = strstrip(buf);
- err = page_counter_memparse(buf, "infinity", &low);
+ err = page_counter_memparse(buf, "max", &low);
if (err)
return err;
@@ -5277,7 +5279,7 @@ static int memory_high_show(struct seq_file *m, void *v)
unsigned long high = ACCESS_ONCE(memcg->high);
if (high == PAGE_COUNTER_MAX)
- seq_puts(m, "infinity\n");
+ seq_puts(m, "max\n");
else
seq_printf(m, "%llu\n", (u64)high * PAGE_SIZE);
@@ -5292,7 +5294,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
int err;
buf = strstrip(buf);
- err = page_counter_memparse(buf, "infinity", &high);
+ err = page_counter_memparse(buf, "max", &high);
if (err)
return err;
@@ -5307,7 +5309,7 @@ static int memory_max_show(struct seq_file *m, void *v)
unsigned long max = ACCESS_ONCE(memcg->memory.limit);
if (max == PAGE_COUNTER_MAX)
- seq_puts(m, "infinity\n");
+ seq_puts(m, "max\n");
else
seq_printf(m, "%llu\n", (u64)max * PAGE_SIZE);
@@ -5322,7 +5324,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
int err;
buf = strstrip(buf);
- err = page_counter_memparse(buf, "infinity", &max);
+ err = page_counter_memparse(buf, "max", &max);
if (err)
return err;
@@ -5426,7 +5428,7 @@ bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg)
if (memcg == root_mem_cgroup)
return false;
- if (page_counter_read(&memcg->memory) > memcg->low)
+ if (page_counter_read(&memcg->memory) >= memcg->low)
return false;
while (memcg != root) {
@@ -5435,7 +5437,7 @@ bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg)
if (memcg == root_mem_cgroup)
break;
- if (page_counter_read(&memcg->memory) > memcg->low)
+ if (page_counter_read(&memcg->memory) >= memcg->low)
return false;
}
return true;
diff --git a/mm/memory.c b/mm/memory.c
index 8068893697bb..97839f5c8c30 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3035,6 +3035,7 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
int last_cpupid;
int target_nid;
bool migrated = false;
+ bool was_writable = pte_write(pte);
int flags = 0;
/* A PROT_NONE fault should not end up here */
@@ -3059,6 +3060,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Make it present again */
pte = pte_modify(pte, vma->vm_page_prot);
pte = pte_mkyoung(pte);
+ if (was_writable)
+ pte = pte_mkwrite(pte);
set_pte_at(mm, addr, ptep, pte);
update_mmu_cache(vma, addr, ptep);
@@ -3069,11 +3072,14 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
}
/*
- * Avoid grouping on DSO/COW pages in specific and RO pages
- * in general, RO pages shouldn't hurt as much anyway since
- * they can be in shared cache state.
+ * Avoid grouping on RO pages in general. RO pages shouldn't hurt as
+ * much anyway since they can be in shared cache state. This misses
+ * the case where a mapping is writable but the process never writes
+ * to it but pte_write gets cleared during protection updates and
+ * pte_dirty has unpredictable behaviour between PTE scan updates,
+ * background writeback, dirty balancing and application behaviour.
*/
- if (!pte_write(pte))
+ if (!(vma->vm_flags & VM_WRITE))
flags |= TNF_NO_GROUP;
/*
@@ -3097,7 +3103,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (migrated) {
page_nid = target_nid;
flags |= TNF_MIGRATED;
- }
+ } else
+ flags |= TNF_MIGRATE_FAIL;
out:
if (page_nid != -1)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9fab10795bea..65842d688b7c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1092,6 +1092,10 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
return NULL;
arch_refresh_nodedata(nid, pgdat);
+ } else {
+ /* Reset the nr_zones and classzone_idx to 0 before reuse */
+ pgdat->nr_zones = 0;
+ pgdat->classzone_idx = 0;
}
/* we can use NODE_DATA(nid) from here */
@@ -1977,15 +1981,6 @@ void try_offline_node(int nid)
if (is_vmalloc_addr(zone->wait_table))
vfree(zone->wait_table);
}
-
- /*
- * Since there is no way to guarentee the address of pgdat/zone is not
- * on stack of any kernel threads or used by other kernel objects
- * without reference counting or other symchronizing method, do not
- * reset node_data and free pgdat here. Just reset it to 0 and reuse
- * the memory when the node is online again.
- */
- memset(pgdat, 0, sizeof(*pgdat));
}
EXPORT_SYMBOL(try_offline_node);
diff --git a/mm/mlock.c b/mm/mlock.c
index 73cf0987088c..8a54cd214925 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -26,10 +26,10 @@
int can_do_mlock(void)
{
- if (capable(CAP_IPC_LOCK))
- return 1;
if (rlimit(RLIMIT_MEMLOCK) != 0)
return 1;
+ if (capable(CAP_IPC_LOCK))
+ return 1;
return 0;
}
EXPORT_SYMBOL(can_do_mlock);
diff --git a/mm/mmap.c b/mm/mmap.c
index da9990acc08b..9ec50a368634 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -774,10 +774,8 @@ again: remove_next = 1 + (end > next->vm_end);
importer->anon_vma = exporter->anon_vma;
error = anon_vma_clone(importer, exporter);
- if (error) {
- importer->anon_vma = NULL;
+ if (error)
return error;
- }
}
}
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 44727811bf4c..88584838e704 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -75,6 +75,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
oldpte = *pte;
if (pte_present(oldpte)) {
pte_t ptent;
+ bool preserve_write = prot_numa && pte_write(oldpte);
/*
* Avoid trapping faults against the zero or KSM
@@ -94,6 +95,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
ptent = ptep_modify_prot_start(mm, addr, pte);
ptent = pte_modify(ptent, newprot);
+ if (preserve_write)
+ ptent = pte_mkwrite(ptent);
/* Avoid taking write faults for known dirty pages */
if (dirty_accountable && pte_dirty(ptent) &&
diff --git a/mm/mremap.c b/mm/mremap.c
index 57dadc025c64..2dc44b1cb1df 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -286,8 +286,14 @@ static unsigned long move_vma(struct vm_area_struct *vma,
old_len = new_len;
old_addr = new_addr;
new_addr = -ENOMEM;
- } else if (vma->vm_file && vma->vm_file->f_op->mremap)
- vma->vm_file->f_op->mremap(vma->vm_file, new_vma);
+ } else if (vma->vm_file && vma->vm_file->f_op->mremap) {
+ err = vma->vm_file->f_op->mremap(vma->vm_file, new_vma);
+ if (err < 0) {
+ move_page_tables(new_vma, new_addr, vma, old_addr,
+ moved_len, true);
+ return err;
+ }
+ }
/* Conceal VM_ACCOUNT so old reservation is not undone */
if (vm_flags & VM_ACCOUNT) {
diff --git a/mm/nommu.c b/mm/nommu.c
index 7296360fc057..3fba2dc97c44 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -62,6 +62,7 @@ void *high_memory;
EXPORT_SYMBOL(high_memory);
struct page *mem_map;
unsigned long max_mapnr;
+EXPORT_SYMBOL(max_mapnr);
unsigned long highest_memmap_pfn;
struct percpu_counter vm_committed_as;
int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
@@ -1213,11 +1214,9 @@ static int do_mmap_private(struct vm_area_struct *vma,
if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) {
total = point;
kdebug("try to alloc exact %lu pages", total);
- base = alloc_pages_exact(len, GFP_KERNEL);
- } else {
- base = (void *)__get_free_pages(GFP_KERNEL, order);
}
+ base = alloc_pages_exact(total << PAGE_SHIFT, GFP_KERNEL);
if (!base)
goto enomem;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 45e187b2d971..644bcb665773 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -857,8 +857,11 @@ static void bdi_update_write_bandwidth(struct backing_dev_info *bdi,
* bw * elapsed + write_bandwidth * (period - elapsed)
* write_bandwidth = ---------------------------------------------------
* period
+ *
+ * @written may have decreased due to account_page_redirty().
+ * Avoid underflowing @bw calculation.
*/
- bw = written - bdi->written_stamp;
+ bw = written - min(written, bdi->written_stamp);
bw *= HZ;
if (unlikely(elapsed > period)) {
do_div(bw, elapsed);
@@ -922,7 +925,7 @@ static void global_update_bandwidth(unsigned long thresh,
unsigned long now)
{
static DEFINE_SPINLOCK(dirty_lock);
- static unsigned long update_time;
+ static unsigned long update_time = INITIAL_JIFFIES;
/*
* check locklessly first to optimize away locking for the most time
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a47f0b229a1a..40e29429e7b0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2353,8 +2353,15 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
if (ac->high_zoneidx < ZONE_NORMAL)
goto out;
/* The OOM killer does not compensate for light reclaim */
- if (!(gfp_mask & __GFP_FS))
+ if (!(gfp_mask & __GFP_FS)) {
+ /*
+ * XXX: Page reclaim didn't yield anything,
+ * and the OOM killer can't be invoked, but
+ * keep looping as per should_alloc_retry().
+ */
+ *did_some_progress = 1;
goto out;
+ }
/*
* GFP_THISNODE contains __GFP_NORETRY and we never hit this.
* Sanity check for bare calls of __GFP_THISNODE, not real OOM.
@@ -2366,7 +2373,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
goto out;
}
/* Exhausted what can be done so it's blamo time */
- if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false))
+ if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)
+ || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
*did_some_progress = 1;
out:
oom_zonelist_unlock(ac->zonelist, gfp_mask);
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 72f5ac381ab3..755a42c76eb4 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -103,6 +103,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype)
if (!is_migrate_isolate_page(buddy)) {
__isolate_free_page(page, order);
+ kernel_map_pages(page, (1 << order), 1);
set_page_refcounted(page);
isolated_page = page;
}
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 75c1f2878519..29f2f8b853ae 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -265,8 +265,15 @@ int walk_page_range(unsigned long start, unsigned long end,
vma = vma->vm_next;
err = walk_page_test(start, next, walk);
- if (err > 0)
+ if (err > 0) {
+ /*
+ * positive return values are purely for
+ * controlling the pagewalk, so should never
+ * be passed to the callers.
+ */
+ err = 0;
continue;
+ }
if (err < 0)
break;
}
diff --git a/mm/rmap.c b/mm/rmap.c
index 5e3e09081164..c161a14b6a8f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -287,6 +287,13 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
return 0;
enomem_failure:
+ /*
+ * dst->anon_vma is dropped here otherwise its degree can be incorrectly
+ * decremented in unlink_anon_vmas().
+ * We can safely do this because callers of anon_vma_clone() don't care
+ * about dst->anon_vma if anon_vma_clone() failed.
+ */
+ dst->anon_vma = NULL;
unlink_anon_vmas(dst);
return -ENOMEM;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index 2f17cb5f00a4..cf2d0ca010bc 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1455,6 +1455,9 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
bool shmem_mapping(struct address_space *mapping)
{
+ if (!mapping->host)
+ return false;
+
return mapping->host->i_sb->s_op == &shmem_ops;
}
diff --git a/mm/slub.c b/mm/slub.c
index 6832c4eab104..82c473780c91 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2449,7 +2449,8 @@ redo:
do {
tid = this_cpu_read(s->cpu_slab->tid);
c = raw_cpu_ptr(s->cpu_slab);
- } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid));
+ } while (IS_ENABLED(CONFIG_PREEMPT) &&
+ unlikely(tid != READ_ONCE(c->tid)));
/*
* Irqless object alloc/free algorithm used here depends on sequence
@@ -2718,7 +2719,8 @@ redo:
do {
tid = this_cpu_read(s->cpu_slab->tid);
c = raw_cpu_ptr(s->cpu_slab);
- } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid));
+ } while (IS_ENABLED(CONFIG_PREEMPT) &&
+ unlikely(tid != READ_ONCE(c->tid)));
/* Same with comment on barrier() in slab_alloc_node() */
barrier();
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 35b25e1340ca..49abccf29a29 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1418,6 +1418,7 @@ struct vm_struct *remove_vm_area(const void *addr)
spin_unlock(&vmap_area_lock);
vmap_debug_free_range(va->va_start, va->va_end);
+ kasan_free_shadow(vm);
free_unmap_vmap_area(va);
vm->size -= PAGE_SIZE;