summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile7
-rw-r--r--mm/dmapool.c2
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/gup.c4
-rw-r--r--mm/huge_memory.c7
-rw-r--r--mm/iov_iter.c14
-rw-r--r--mm/memblock.c4
-rw-r--r--mm/memcontrol.c36
-rw-r--r--mm/memory.c4
-rw-r--r--mm/migrate.c5
-rw-r--r--mm/mmap.c16
-rw-r--r--mm/mmu_notifier.c5
-rw-r--r--mm/nobootmem.c2
-rw-r--r--mm/page_alloc.c7
-rw-r--r--mm/percpu-vm.c22
-rw-r--r--mm/percpu.c2
-rw-r--r--mm/rmap.c6
-rw-r--r--mm/shmem.c4
-rw-r--r--mm/slab.c15
19 files changed, 113 insertions, 51 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 632ae77e6070..fe7a053c0f45 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -3,7 +3,7 @@
#
mmu-y := nommu.o
-mmu-$(CONFIG_MMU) := fremap.o gup.o highmem.o madvise.o memory.o mincore.o \
+mmu-$(CONFIG_MMU) := fremap.o gup.o highmem.o memory.o mincore.o \
mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
vmalloc.o pagewalk.o pgtable-generic.o
@@ -11,7 +11,7 @@ ifdef CONFIG_CROSS_MEMORY_ATTACH
mmu-$(CONFIG_MMU) += process_vm_access.o
endif
-obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
+obj-y := filemap.o mempool.o oom_kill.o \
maccess.o page_alloc.o page-writeback.o \
readahead.o swap.o truncate.o vmscan.o shmem.o \
util.o mmzone.o vmstat.o backing-dev.o \
@@ -28,6 +28,9 @@ else
obj-y += bootmem.o
endif
+ifdef CONFIG_MMU
+ obj-$(CONFIG_ADVISE_SYSCALLS) += fadvise.o madvise.o
+endif
obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
diff --git a/mm/dmapool.c b/mm/dmapool.c
index 306baa594f95..ba8019b063e1 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -176,7 +176,7 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
if (list_empty(&dev->dma_pools) &&
device_create_file(dev, &dev_attr_pools)) {
kfree(retval);
- return NULL;
+ retval = NULL;
} else
list_add(&retval->pools, &dev->dma_pools);
mutex_unlock(&pools_lock);
diff --git a/mm/filemap.c b/mm/filemap.c
index b9b1413080be..0ab0a3ea5721 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -736,7 +736,7 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue);
*
* Unlocks the page and wakes up sleepers in ___wait_on_page_locked().
* Also wakes sleepers in wait_on_page_writeback() because the wakeup
- * mechananism between PageLocked pages and PageWriteback pages is shared.
+ * mechanism between PageLocked pages and PageWriteback pages is shared.
* But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
*
* The mb is necessary to enforce ordering between the clear_bit and the read
diff --git a/mm/gup.c b/mm/gup.c
index 91d044b1600d..af7ea3e0826b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -281,6 +281,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
if (*flags & FOLL_NOWAIT)
fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
+ if (*flags & FOLL_TRIED) {
+ VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
+ fault_flags |= FAULT_FLAG_TRIED;
+ }
ret = handle_mm_fault(mm, vma, address, fault_flags);
if (ret & VM_FAULT_ERROR) {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d9a21d06b862..f8ffd9412ec5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1795,14 +1795,17 @@ static int __split_huge_page_map(struct page *page,
for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
pte_t *pte, entry;
BUG_ON(PageCompound(page+i));
+ /*
+ * Note that pmd_numa is not transferred deliberately
+ * to avoid any possibility that pte_numa leaks to
+ * a PROT_NONE VMA by accident.
+ */
entry = mk_pte(page + i, vma->vm_page_prot);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
if (!pmd_write(*pmd))
entry = pte_wrprotect(entry);
if (!pmd_young(*pmd))
entry = pte_mkold(entry);
- if (pmd_numa(*pmd))
- entry = pte_mknuma(entry);
pte = pte_offset_map(&_pmd, haddr);
BUG_ON(!pte_none(*pte));
set_pte_at(mm, haddr, pte, entry);
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index ab88dc0ea1d3..9a09f2034fcc 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -310,7 +310,7 @@ void iov_iter_init(struct iov_iter *i, int direction,
EXPORT_SYMBOL(iov_iter_init);
static ssize_t get_pages_iovec(struct iov_iter *i,
- struct page **pages, unsigned maxpages,
+ struct page **pages, size_t maxsize, unsigned maxpages,
size_t *start)
{
size_t offset = i->iov_offset;
@@ -323,6 +323,8 @@ static ssize_t get_pages_iovec(struct iov_iter *i,
len = iov->iov_len - offset;
if (len > i->count)
len = i->count;
+ if (len > maxsize)
+ len = maxsize;
addr = (unsigned long)iov->iov_base + offset;
len += *start = addr & (PAGE_SIZE - 1);
if (len > maxpages * PAGE_SIZE)
@@ -588,13 +590,15 @@ static unsigned long alignment_bvec(const struct iov_iter *i)
}
static ssize_t get_pages_bvec(struct iov_iter *i,
- struct page **pages, unsigned maxpages,
+ struct page **pages, size_t maxsize, unsigned maxpages,
size_t *start)
{
const struct bio_vec *bvec = i->bvec;
size_t len = bvec->bv_len - i->iov_offset;
if (len > i->count)
len = i->count;
+ if (len > maxsize)
+ len = maxsize;
/* can't be more than PAGE_SIZE */
*start = bvec->bv_offset + i->iov_offset;
@@ -711,13 +715,13 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
EXPORT_SYMBOL(iov_iter_alignment);
ssize_t iov_iter_get_pages(struct iov_iter *i,
- struct page **pages, unsigned maxpages,
+ struct page **pages, size_t maxsize, unsigned maxpages,
size_t *start)
{
if (i->type & ITER_BVEC)
- return get_pages_bvec(i, pages, maxpages, start);
+ return get_pages_bvec(i, pages, maxsize, maxpages, start);
else
- return get_pages_iovec(i, pages, maxpages, start);
+ return get_pages_iovec(i, pages, maxsize, maxpages, start);
}
EXPORT_SYMBOL(iov_iter_get_pages);
diff --git a/mm/memblock.c b/mm/memblock.c
index 70fad0c0dafb..6ecb0d937fb5 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -816,6 +816,10 @@ void __init_memblock __next_mem_range(u64 *idx, int nid,
if (nid != NUMA_NO_NODE && nid != m_nid)
continue;
+ /* skip hotpluggable memory regions if needed */
+ if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
+ continue;
+
if (!type_b) {
if (out_start)
*out_start = m_start;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 085dc6d2f876..28928ce9b07f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -292,6 +292,9 @@ struct mem_cgroup {
/* vmpressure notifications */
struct vmpressure vmpressure;
+ /* css_online() has been completed */
+ int initialized;
+
/*
* the counter to account for mem+swap usage.
*/
@@ -1099,10 +1102,21 @@ skip_node:
* skipping css reference should be safe.
*/
if (next_css) {
- if ((next_css == &root->css) ||
- ((next_css->flags & CSS_ONLINE) &&
- css_tryget_online(next_css)))
- return mem_cgroup_from_css(next_css);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(next_css);
+
+ if (next_css == &root->css)
+ return memcg;
+
+ if (css_tryget_online(next_css)) {
+ /*
+ * Make sure the memcg is initialized:
+ * mem_cgroup_css_online() orders the the
+ * initialization against setting the flag.
+ */
+ if (smp_load_acquire(&memcg->initialized))
+ return memcg;
+ css_put(next_css);
+ }
prev_css = next_css;
goto skip_node;
@@ -5549,6 +5563,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct mem_cgroup *parent = mem_cgroup_from_css(css->parent);
+ int ret;
if (css->id > MEM_CGROUP_ID_MAX)
return -ENOSPC;
@@ -5585,7 +5600,18 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
}
mutex_unlock(&memcg_create_mutex);
- return memcg_init_kmem(memcg, &memory_cgrp_subsys);
+ ret = memcg_init_kmem(memcg, &memory_cgrp_subsys);
+ if (ret)
+ return ret;
+
+ /*
+ * Make sure the memcg is initialized: mem_cgroup_iter()
+ * orders reading memcg->initialized against its callers
+ * reading the memcg members.
+ */
+ smp_store_release(&memcg->initialized, 1);
+
+ return 0;
}
/*
diff --git a/mm/memory.c b/mm/memory.c
index adeac306610f..e229970e4223 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -118,6 +118,8 @@ __setup("norandmaps", disable_randmaps);
unsigned long zero_pfn __read_mostly;
unsigned long highest_memmap_pfn __read_mostly;
+EXPORT_SYMBOL(zero_pfn);
+
/*
* CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
*/
@@ -1125,7 +1127,7 @@ again:
addr) != page->index) {
pte_t ptfile = pgoff_to_pte(page->index);
if (pte_soft_dirty(ptent))
- pte_file_mksoft_dirty(ptfile);
+ ptfile = pte_file_mksoft_dirty(ptfile);
set_pte_at(mm, addr, pte, ptfile);
}
if (PageAnon(page))
diff --git a/mm/migrate.c b/mm/migrate.c
index f78ec9bd454d..2740360cd216 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -146,8 +146,11 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
if (pte_swp_soft_dirty(*ptep))
pte = pte_mksoft_dirty(pte);
+
+ /* Recheck VMA as permissions can change since migration started */
if (is_write_migration_entry(entry))
- pte = pte_mkwrite(pte);
+ pte = maybe_mkwrite(pte, vma);
+
#ifdef CONFIG_HUGETLB_PAGE
if (PageHuge(new)) {
pte = pte_mkhuge(pte);
diff --git a/mm/mmap.c b/mm/mmap.c
index c1f2ea4a0b99..c0a3637cdb64 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -369,20 +369,20 @@ static int browse_rb(struct rb_root *root)
struct vm_area_struct *vma;
vma = rb_entry(nd, struct vm_area_struct, vm_rb);
if (vma->vm_start < prev) {
- pr_info("vm_start %lx prev %lx\n", vma->vm_start, prev);
+ pr_emerg("vm_start %lx prev %lx\n", vma->vm_start, prev);
bug = 1;
}
if (vma->vm_start < pend) {
- pr_info("vm_start %lx pend %lx\n", vma->vm_start, pend);
+ pr_emerg("vm_start %lx pend %lx\n", vma->vm_start, pend);
bug = 1;
}
if (vma->vm_start > vma->vm_end) {
- pr_info("vm_end %lx < vm_start %lx\n",
+ pr_emerg("vm_end %lx < vm_start %lx\n",
vma->vm_end, vma->vm_start);
bug = 1;
}
if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
- pr_info("free gap %lx, correct %lx\n",
+ pr_emerg("free gap %lx, correct %lx\n",
vma->rb_subtree_gap,
vma_compute_subtree_gap(vma));
bug = 1;
@@ -396,7 +396,7 @@ static int browse_rb(struct rb_root *root)
for (nd = pn; nd; nd = rb_prev(nd))
j++;
if (i != j) {
- pr_info("backwards %d, forwards %d\n", j, i);
+ pr_emerg("backwards %d, forwards %d\n", j, i);
bug = 1;
}
return bug ? -1 : i;
@@ -431,17 +431,17 @@ static void validate_mm(struct mm_struct *mm)
i++;
}
if (i != mm->map_count) {
- pr_info("map_count %d vm_next %d\n", mm->map_count, i);
+ pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
bug = 1;
}
if (highest_address != mm->highest_vm_end) {
- pr_info("mm->highest_vm_end %lx, found %lx\n",
+ pr_emerg("mm->highest_vm_end %lx, found %lx\n",
mm->highest_vm_end, highest_address);
bug = 1;
}
i = browse_rb(&mm->mm_rb);
if (i != mm->map_count) {
- pr_info("map_count %d rb %d\n", mm->map_count, i);
+ pr_emerg("map_count %d rb %d\n", mm->map_count, i);
bug = 1;
}
BUG_ON(bug);
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 950813b1eb36..2c8da9825fe3 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -107,7 +107,8 @@ void __mmu_notifier_release(struct mm_struct *mm)
* existed or not.
*/
int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
- unsigned long address)
+ unsigned long start,
+ unsigned long end)
{
struct mmu_notifier *mn;
int young = 0, id;
@@ -115,7 +116,7 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
if (mn->ops->clear_flush_young)
- young |= mn->ops->clear_flush_young(mn, mm, address);
+ young |= mn->ops->clear_flush_young(mn, mm, start, end);
}
srcu_read_unlock(&srcu, id);
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 7ed58602e71b..7c7ab32ee503 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -119,6 +119,8 @@ static unsigned long __init free_low_memory_core_early(void)
phys_addr_t start, end;
u64 i;
+ memblock_clear_hotplug(0, -1);
+
for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL)
count += __free_memory_core(start, end);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 18cee0d4c8a2..eee961958021 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1612,7 +1612,7 @@ again:
}
__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
- if (zone_page_state(zone, NR_ALLOC_BATCH) == 0 &&
+ if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 &&
!zone_is_fair_depleted(zone))
zone_set_flag(zone, ZONE_FAIR_DEPLETED);
@@ -5701,9 +5701,8 @@ static void __setup_per_zone_wmarks(void)
zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
__mod_zone_page_state(zone, NR_ALLOC_BATCH,
- high_wmark_pages(zone) -
- low_wmark_pages(zone) -
- zone_page_state(zone, NR_ALLOC_BATCH));
+ high_wmark_pages(zone) - low_wmark_pages(zone) -
+ atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
setup_zone_migrate_reserve(zone);
spin_unlock_irqrestore(&zone->lock, flags);
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 3707c71ae4cd..51108165f829 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -108,7 +108,7 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
int page_start, int page_end)
{
const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
- unsigned int cpu;
+ unsigned int cpu, tcpu;
int i;
for_each_possible_cpu(cpu) {
@@ -116,14 +116,23 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
*pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0);
- if (!*pagep) {
- pcpu_free_pages(chunk, pages, populated,
- page_start, page_end);
- return -ENOMEM;
- }
+ if (!*pagep)
+ goto err;
}
}
return 0;
+
+err:
+ while (--i >= page_start)
+ __free_page(pages[pcpu_page_idx(cpu, i)]);
+
+ for_each_possible_cpu(tcpu) {
+ if (tcpu == cpu)
+ break;
+ for (i = page_start; i < page_end; i++)
+ __free_page(pages[pcpu_page_idx(tcpu, i)]);
+ }
+ return -ENOMEM;
}
/**
@@ -263,6 +272,7 @@ err:
__pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start),
page_end - page_start);
}
+ pcpu_post_unmap_tlb_flush(chunk, page_start, page_end);
return err;
}
diff --git a/mm/percpu.c b/mm/percpu.c
index 2139e30a4b44..da997f9800bd 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1932,6 +1932,8 @@ void __init setup_per_cpu_areas(void)
if (pcpu_setup_first_chunk(ai, fc) < 0)
panic("Failed to initialize percpu areas.");
+
+ pcpu_free_alloc_info(ai);
}
#endif /* CONFIG_SMP */
diff --git a/mm/rmap.c b/mm/rmap.c
index 3e8491c504f8..bc74e0012809 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1355,7 +1355,11 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
continue; /* don't unmap */
}
- if (ptep_clear_flush_young_notify(vma, address, pte))
+ /*
+ * No need for _notify because we're within an
+ * mmu_notifier_invalidate_range_ {start|end} scope.
+ */
+ if (ptep_clear_flush_young(vma, address, pte))
continue;
/* Nuke the page table entry. */
diff --git a/mm/shmem.c b/mm/shmem.c
index 0e5fb225007c..469f90d56051 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2367,8 +2367,10 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc
if (new_dentry->d_inode) {
(void) shmem_unlink(new_dir, new_dentry);
- if (they_are_dirs)
+ if (they_are_dirs) {
+ drop_nlink(new_dentry->d_inode);
drop_nlink(old_dir);
+ }
} else if (they_are_dirs) {
drop_nlink(old_dir);
inc_nlink(new_dir);
diff --git a/mm/slab.c b/mm/slab.c
index a467b308c682..7c52b3890d25 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2124,7 +2124,8 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
int
__kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
{
- size_t left_over, freelist_size, ralign;
+ size_t left_over, freelist_size;
+ size_t ralign = BYTES_PER_WORD;
gfp_t gfp;
int err;
size_t size = cachep->size;
@@ -2157,14 +2158,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
size &= ~(BYTES_PER_WORD - 1);
}
- /*
- * Redzoning and user store require word alignment or possibly larger.
- * Note this will be overridden by architecture or caller mandated
- * alignment if either is greater than BYTES_PER_WORD.
- */
- if (flags & SLAB_STORE_USER)
- ralign = BYTES_PER_WORD;
-
if (flags & SLAB_RED_ZONE) {
ralign = REDZONE_ALIGN;
/* If redzoning, ensure that the second redzone is suitably
@@ -2994,7 +2987,7 @@ out:
#ifdef CONFIG_NUMA
/*
- * Try allocating on another node if PF_SPREAD_SLAB is a mempolicy is set.
+ * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set.
*
* If we are in_interrupt, then process context, including cpusets and
* mempolicy, may not apply and should not be used for allocation policy.
@@ -3226,7 +3219,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
{
void *objp;
- if (current->mempolicy || unlikely(current->flags & PF_SPREAD_SLAB)) {
+ if (current->mempolicy || cpuset_do_slab_mem_spread()) {
objp = alternate_node_alloc(cache, flags);
if (objp)
goto out;