summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-06 08:10:54 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-06 08:10:54 +0100
commit2e3078af2c67730c479f1d183af5b367f5d95337 (patch)
treeb7881c6c9c479aadac345df7e18e3c0e10f0811e /include
parentvfs: clear remainder of 'full_fds_bits' in dup_fd() (diff)
parentselftests: vm: add tests for lock on fault (diff)
downloadlinux-2e3078af2c67730c479f1d183af5b367f5d95337.tar.xz
linux-2e3078af2c67730c479f1d183af5b367f5d95337.zip
Merge branch 'akpm' (patches from Andrew)
Merge patch-bomb from Andrew Morton: - inotify tweaks - some ocfs2 updates (many more are awaiting review) - various misc bits - kernel/watchdog.c updates - Some of mm. I have a huge number of MM patches this time and quite a lot of it is quite difficult and much will be held over to next time. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (162 commits) selftests: vm: add tests for lock on fault mm: mlock: add mlock flags to enable VM_LOCKONFAULT usage mm: introduce VM_LOCKONFAULT mm: mlock: add new mlock system call mm: mlock: refactor mlock, munlock, and munlockall code kasan: always taint kernel on report mm, slub, kasan: enable user tracking by default with KASAN=y kasan: use IS_ALIGNED in memory_is_poisoned_8() kasan: Fix a type conversion error lib: test_kasan: add some testcases kasan: update reference to kasan prototype repo kasan: move KASAN_SANITIZE in arch/x86/boot/Makefile kasan: various fixes in documentation kasan: update log messages kasan: accurately determine the type of the bad access kasan: update reported bug types for kernel memory accesses kasan: update reported bug types for not user nor kernel memory accesses mm/kasan: prevent deadlock in kasan reporting mm/kasan: don't use kasan shadow pointer in generic functions mm/kasan: MODULE_VADDR is not available on all archs ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/compaction.h3
-rw-r--r--include/linux/compiler-gcc.h17
-rw-r--r--include/linux/compiler.h8
-rw-r--r--include/linux/cpuset.h4
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/hugetlb.h19
-rw-r--r--include/linux/memblock.h4
-rw-r--r--include/linux/memcontrol.h147
-rw-r--r--include/linux/mm.h11
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--include/linux/mmzone.h3
-rw-r--r--include/linux/nmi.h1
-rw-r--r--include/linux/page-flags.h2
-rw-r--r--include/linux/page_counter.h6
-rw-r--r--include/linux/sched.h17
-rw-r--r--include/linux/slab.h2
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/linux/tracehook.h3
-rw-r--r--include/linux/types.h16
-rw-r--r--include/linux/uaccess.h40
-rw-r--r--include/linux/vm_event_item.h4
-rw-r--r--include/linux/vmstat.h25
-rw-r--r--include/trace/events/compaction.h72
-rw-r--r--include/uapi/asm-generic/mman-common.h5
-rw-r--r--include/uapi/asm-generic/mman.h1
-rw-r--r--include/uapi/asm-generic/unistd.h4
26 files changed, 226 insertions, 194 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index aa8f61cf3a19..4cd4ddf64cc7 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -15,7 +15,8 @@
/* For more detailed tracepoint output */
#define COMPACT_NO_SUITABLE_PAGE 5
#define COMPACT_NOT_SUITABLE_ZONE 6
-/* When adding new state, please change compaction_status_string, too */
+#define COMPACT_CONTENDED 7
+/* When adding new states, please adjust include/trace/events/compaction.h */
/* Used to signal whether compaction detected need_sched() or lock contention */
/* No contention detected */
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 8efb40e61d6e..0e3110a0b771 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -210,6 +210,23 @@
#define __visible __attribute__((externally_visible))
#endif
+
+#if GCC_VERSION >= 40900 && !defined(__CHECKER__)
+/*
+ * __assume_aligned(n, k): Tell the optimizer that the returned
+ * pointer can be assumed to be k modulo n. The second argument is
+ * optional (default 0), so we use a variadic macro to make the
+ * shorthand.
+ *
+ * Beware: Do not apply this to functions which may return
+ * ERR_PTRs. Also, it is probably unwise to apply it to functions
+ * returning extra information in the low bits (but in that case the
+ * compiler should see some alignment anyway, when the return value is
+ * massaged by 'flags = ptr & 3; ptr &= ~3;').
+ */
+#define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
+#endif
+
/*
* GCC 'asm goto' miscompiles certain code sequences:
*
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 52a459ff75f4..4dac1036594f 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -417,6 +417,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
#define __visible
#endif
+/*
+ * Assume alignment of return value.
+ */
+#ifndef __assume_aligned
+#define __assume_aligned(a, ...)
+#endif
+
+
/* Are two types/vars the same type (ignoring qualifiers)? */
#ifndef __same_type
# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 1b357997cac5..5a1311942358 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -93,7 +93,7 @@ extern int current_cpuset_is_being_rebound(void);
extern void rebuild_sched_domains(void);
-extern void cpuset_print_task_mems_allowed(struct task_struct *p);
+extern void cpuset_print_current_mems_allowed(void);
/*
* read_mems_allowed_begin is required when making decisions involving
@@ -219,7 +219,7 @@ static inline void rebuild_sched_domains(void)
partition_sched_domains(1, NULL, NULL);
}
-static inline void cpuset_print_task_mems_allowed(struct task_struct *p)
+static inline void cpuset_print_current_mems_allowed(void)
{
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 49749688156d..9a1cb8c605e0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2409,6 +2409,7 @@ extern int write_inode_now(struct inode *, int);
extern int filemap_fdatawrite(struct address_space *);
extern int filemap_flush(struct address_space *);
extern int filemap_fdatawait(struct address_space *);
+extern void filemap_fdatawait_keep_errors(struct address_space *);
extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
loff_t lend);
extern int filemap_write_and_wait(struct address_space *mapping);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5e35379f58a5..685c262e0be8 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -483,6 +483,17 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
#define hugepages_supported() (HPAGE_SHIFT != 0)
#endif
+void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm);
+
+static inline void hugetlb_count_add(long l, struct mm_struct *mm)
+{
+ atomic_long_add(l, &mm->hugetlb_usage);
+}
+
+static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
+{
+ atomic_long_sub(l, &mm->hugetlb_usage);
+}
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};
#define alloc_huge_page(v, a, r) NULL
@@ -519,6 +530,14 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
{
return &mm->page_table_lock;
}
+
+static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m)
+{
+}
+
+static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
+{
+}
#endif /* CONFIG_HUGETLB_PAGE */
static inline spinlock_t *huge_pte_lock(struct hstate *h,
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index c518eb589260..24daf8fc4d7c 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -89,10 +89,6 @@ int memblock_add_range(struct memblock_type *type,
phys_addr_t base, phys_addr_t size,
int nid, unsigned long flags);
-int memblock_remove_range(struct memblock_type *type,
- phys_addr_t base,
- phys_addr_t size);
-
void __next_mem_range(u64 *idx, int nid, ulong flags,
struct memblock_type *type_a,
struct memblock_type *type_b, phys_addr_t *out_start,
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 27251ed428f7..cd0e2413c358 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -301,8 +301,7 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
void mem_cgroup_uncharge(struct page *page);
void mem_cgroup_uncharge_list(struct list_head *page_list);
-void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
- bool lrucare);
+void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage);
struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
@@ -384,7 +383,7 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
return mz->lru_size[lru];
}
-static inline int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
+static inline bool mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
{
unsigned long inactive_ratio;
unsigned long inactive;
@@ -403,24 +402,26 @@ static inline int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
return inactive * inactive_ratio < active;
}
+void mem_cgroup_handle_over_high(void);
+
void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
struct task_struct *p);
static inline void mem_cgroup_oom_enable(void)
{
- WARN_ON(current->memcg_oom.may_oom);
- current->memcg_oom.may_oom = 1;
+ WARN_ON(current->memcg_may_oom);
+ current->memcg_may_oom = 1;
}
static inline void mem_cgroup_oom_disable(void)
{
- WARN_ON(!current->memcg_oom.may_oom);
- current->memcg_oom.may_oom = 0;
+ WARN_ON(!current->memcg_may_oom);
+ current->memcg_may_oom = 0;
}
static inline bool task_in_memcg_oom(struct task_struct *p)
{
- return p->memcg_oom.memcg;
+ return p->memcg_in_oom;
}
bool mem_cgroup_oom_synchronize(bool wait);
@@ -537,9 +538,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
{
}
-static inline void mem_cgroup_migrate(struct page *oldpage,
- struct page *newpage,
- bool lrucare)
+static inline void mem_cgroup_replace_page(struct page *old, struct page *new)
{
}
@@ -585,10 +584,10 @@ static inline bool mem_cgroup_disabled(void)
return true;
}
-static inline int
+static inline bool
mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
{
- return 1;
+ return true;
}
static inline bool mem_cgroup_lruvec_online(struct lruvec *lruvec)
@@ -622,6 +621,10 @@ static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
{
}
+static inline void mem_cgroup_handle_over_high(void)
+{
+}
+
static inline void mem_cgroup_oom_enable(void)
{
}
@@ -748,11 +751,10 @@ static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
* conditions, but because they are pretty simple, they are expected to be
* fast.
*/
-bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg,
- int order);
-void __memcg_kmem_commit_charge(struct page *page,
- struct mem_cgroup *memcg, int order);
-void __memcg_kmem_uncharge_pages(struct page *page, int order);
+int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
+ struct mem_cgroup *memcg);
+int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
+void __memcg_kmem_uncharge(struct page *page, int order);
/*
* helper for acessing a memcg's index. It will be used as an index in the
@@ -767,77 +769,42 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep);
void __memcg_kmem_put_cache(struct kmem_cache *cachep);
-struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr);
-
-int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
- unsigned long nr_pages);
-void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages);
-
-/**
- * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
- * @gfp: the gfp allocation flags.
- * @memcg: a pointer to the memcg this was charged against.
- * @order: allocation order.
- *
- * returns true if the memcg where the current task belongs can hold this
- * allocation.
- *
- * We return true automatically if this allocation is not to be accounted to
- * any memcg.
- */
-static inline bool
-memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
+static inline bool __memcg_kmem_bypass(gfp_t gfp)
{
if (!memcg_kmem_enabled())
return true;
-
if (gfp & __GFP_NOACCOUNT)
return true;
- /*
- * __GFP_NOFAIL allocations will move on even if charging is not
- * possible. Therefore we don't even try, and have this allocation
- * unaccounted. We could in theory charge it forcibly, but we hope
- * those allocations are rare, and won't be worth the trouble.
- */
- if (gfp & __GFP_NOFAIL)
- return true;
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
return true;
-
- /* If the test is dying, just let it go. */
- if (unlikely(fatal_signal_pending(current)))
- return true;
-
- return __memcg_kmem_newpage_charge(gfp, memcg, order);
+ return false;
}
/**
- * memcg_kmem_uncharge_pages: uncharge pages from memcg
- * @page: pointer to struct page being freed
- * @order: allocation order.
+ * memcg_kmem_charge: charge a kmem page
+ * @page: page to charge
+ * @gfp: reclaim mode
+ * @order: allocation order
+ *
+ * Returns 0 on success, an error code on failure.
*/
-static inline void
-memcg_kmem_uncharge_pages(struct page *page, int order)
+static __always_inline int memcg_kmem_charge(struct page *page,
+ gfp_t gfp, int order)
{
- if (memcg_kmem_enabled())
- __memcg_kmem_uncharge_pages(page, order);
+ if (__memcg_kmem_bypass(gfp))
+ return 0;
+ return __memcg_kmem_charge(page, gfp, order);
}
/**
- * memcg_kmem_commit_charge: embeds correct memcg in a page
- * @page: pointer to struct page recently allocated
- * @memcg: the memcg structure we charged against
- * @order: allocation order.
- *
- * Needs to be called after memcg_kmem_newpage_charge, regardless of success or
- * failure of the allocation. if @page is NULL, this function will revert the
- * charges. Otherwise, it will commit @page to @memcg.
+ * memcg_kmem_uncharge: uncharge a kmem page
+ * @page: page to uncharge
+ * @order: allocation order
*/
-static inline void
-memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
+static __always_inline void memcg_kmem_uncharge(struct page *page, int order)
{
- if (memcg_kmem_enabled() && memcg)
- __memcg_kmem_commit_charge(page, memcg, order);
+ if (memcg_kmem_enabled())
+ __memcg_kmem_uncharge(page, order);
}
/**
@@ -850,17 +817,8 @@ memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
static __always_inline struct kmem_cache *
memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
{
- if (!memcg_kmem_enabled())
- return cachep;
- if (gfp & __GFP_NOACCOUNT)
- return cachep;
- if (gfp & __GFP_NOFAIL)
- return cachep;
- if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
+ if (__memcg_kmem_bypass(gfp))
return cachep;
- if (unlikely(fatal_signal_pending(current)))
- return cachep;
-
return __memcg_kmem_get_cache(cachep);
}
@@ -869,13 +827,6 @@ static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
if (memcg_kmem_enabled())
__memcg_kmem_put_cache(cachep);
}
-
-static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
-{
- if (!memcg_kmem_enabled())
- return NULL;
- return __mem_cgroup_from_kmem(ptr);
-}
#else
#define for_each_memcg_cache_index(_idx) \
for (; NULL; )
@@ -890,18 +841,12 @@ static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
return false;
}
-static inline bool
-memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
-{
- return true;
-}
-
-static inline void memcg_kmem_uncharge_pages(struct page *page, int order)
+static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
{
+ return 0;
}
-static inline void
-memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
+static inline void memcg_kmem_uncharge(struct page *page, int order)
{
}
@@ -927,11 +872,5 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
static inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
{
}
-
-static inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
-{
- return NULL;
-}
#endif /* CONFIG_MEMCG_KMEM */
#endif /* _LINUX_MEMCONTROL_H */
-
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80001de019ba..906c46a05707 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -139,6 +139,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
+#define VM_LOCKONFAULT 0x00080000 /* Lock the pages covered when they are faulted in */
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
@@ -202,6 +203,9 @@ extern unsigned int kobjsize(const void *objp);
/* This mask defines which mm->def_flags a process can inherit its parent */
#define VM_INIT_DEF_MASK VM_NOHUGEPAGE
+/* This mask is used to clear all the VMA flags used by mlock */
+#define VM_LOCKED_CLEAR_MASK (~(VM_LOCKED | VM_LOCKONFAULT))
+
/*
* mapping from the currently active vm_flags protection bits (the
* low four bits) to a page protection mask..
@@ -1606,8 +1610,10 @@ static inline void pgtable_init(void)
static inline bool pgtable_page_ctor(struct page *page)
{
+ if (!ptlock_init(page))
+ return false;
inc_zone_page_state(page, NR_PAGETABLE);
- return ptlock_init(page);
+ return true;
}
static inline void pgtable_page_dtor(struct page *page)
@@ -2036,8 +2042,6 @@ void page_cache_async_readahead(struct address_space *mapping,
pgoff_t offset,
unsigned long size);
-unsigned long max_sane_readahead(unsigned long nr);
-
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
@@ -2137,6 +2141,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
+#define FOLL_MLOCK 0x1000 /* lock present pages */
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3d6baa7d4534..0a85da25a822 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -486,6 +486,9 @@ struct mm_struct {
/* address of the bounds directory */
void __user *bd_addr;
#endif
+#ifdef CONFIG_HUGETLB_PAGE
+ atomic_long_t hugetlb_usage;
+#endif
};
static inline void mm_init_cpumask(struct mm_struct *mm)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d94347737292..2d7e660cdefe 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -823,8 +823,7 @@ enum memmap_context {
MEMMAP_HOTPLUG,
};
extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
- unsigned long size,
- enum memmap_context context);
+ unsigned long size);
extern void lruvec_init(struct lruvec *lruvec);
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 78488e099ce7..7ec5b86735f3 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -73,6 +73,7 @@ extern int watchdog_user_enabled;
extern int watchdog_thresh;
extern unsigned long *watchdog_cpumask_bits;
extern int sysctl_softlockup_all_cpu_backtrace;
+extern int sysctl_hardlockup_all_cpu_backtrace;
struct ctl_table;
extern int proc_watchdog(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 416509e26d6d..a525e5067484 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -256,7 +256,7 @@ PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim)
* Must use a macro here due to header dependency issues. page_zone() is not
* available at this point.
*/
-#define PageHighMem(__p) is_highmem(page_zone(__p))
+#define PageHighMem(__p) is_highmem_idx(page_zonenum(__p))
#else
PAGEFLAG_FALSE(HighMem)
#endif
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 17fa4f8de3a6..7e62920a3a94 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -36,9 +36,9 @@ static inline unsigned long page_counter_read(struct page_counter *counter)
void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages);
void page_counter_charge(struct page_counter *counter, unsigned long nr_pages);
-int page_counter_try_charge(struct page_counter *counter,
- unsigned long nr_pages,
- struct page_counter **fail);
+bool page_counter_try_charge(struct page_counter *counter,
+ unsigned long nr_pages,
+ struct page_counter **fail);
void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages);
int page_counter_limit(struct page_counter *counter, unsigned long limit);
int page_counter_memparse(const char *buf, const char *max,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4effb1025fbb..eeb5066a44fb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -384,6 +384,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos);
extern unsigned int softlockup_panic;
+extern unsigned int hardlockup_panic;
void lockup_detector_init(void);
#else
static inline void touch_softlockup_watchdog(void)
@@ -1460,7 +1461,9 @@ struct task_struct {
unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1;
unsigned sched_migrated:1;
-
+#ifdef CONFIG_MEMCG
+ unsigned memcg_may_oom:1;
+#endif
#ifdef CONFIG_MEMCG_KMEM
unsigned memcg_kmem_skip_account:1;
#endif
@@ -1791,12 +1794,12 @@ struct task_struct {
unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
#ifdef CONFIG_MEMCG
- struct memcg_oom_info {
- struct mem_cgroup *memcg;
- gfp_t gfp_mask;
- int order;
- unsigned int may_oom:1;
- } memcg_oom;
+ struct mem_cgroup *memcg_in_oom;
+ gfp_t memcg_oom_gfp_mask;
+ int memcg_oom_order;
+
+ /* number of pages to reclaim on returning to userland */
+ unsigned int memcg_nr_pages_over_high;
#endif
#ifdef CONFIG_UPROBES
struct uprobe_task *utask;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 7e37d448ed91..7c82e3b307a3 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -111,7 +111,7 @@ struct mem_cgroup;
* struct kmem_cache related prototypes
*/
void __init kmem_cache_init(void);
-int slab_is_available(void);
+bool slab_is_available(void);
struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
unsigned long,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a460e2ef2843..a156b82dd14c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -887,4 +887,6 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename,
asmlinkage long sys_membarrier(int cmd, int flags);
+asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags);
+
#endif
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 84d497297c5f..26c152122a42 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -50,6 +50,7 @@
#include <linux/ptrace.h>
#include <linux/security.h>
#include <linux/task_work.h>
+#include <linux/memcontrol.h>
struct linux_binprm;
/*
@@ -188,6 +189,8 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
smp_mb__after_atomic();
if (unlikely(current->task_works))
task_work_run();
+
+ mem_cgroup_handle_over_high();
}
#endif /* <linux/tracehook.h> */
diff --git a/include/linux/types.h b/include/linux/types.h
index c314989d9158..70d8500bddf1 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -205,11 +205,25 @@ struct ustat {
* struct callback_head - callback structure for use with RCU and task_work
* @next: next update requests in a list
* @func: actual update function to call after the grace period.
+ *
+ * The struct is aligned to size of pointer. On most architectures it happens
+ * naturally due ABI requirements, but some architectures (like CRIS) have
+ * weird ABI and we need to ask it explicitly.
+ *
+ * The alignment is required to guarantee that bits 0 and 1 of @next will be
+ * clear under normal conditions -- as long as we use call_rcu(),
+ * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback.
+ *
+ * This guarantee is important for few reasons:
+ * - future call_rcu_lazy() will make use of lower bits in the pointer;
+ * - the structure shares storage spacer in struct page with @compound_head,
+ * which encode PageTail() in bit 0. The guarantee is needed to avoid
+ * false-positive PageTail().
*/
struct callback_head {
struct callback_head *next;
void (*func)(struct callback_head *head);
-};
+} __attribute__((aligned(sizeof(void *))));
#define rcu_head callback_head
typedef void (*rcu_callback_t)(struct rcu_head *head);
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index d6f2c2c5b043..558129af828a 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -75,36 +75,6 @@ static inline unsigned long __copy_from_user_nocache(void *to,
#endif /* ARCH_HAS_NOCACHE_UACCESS */
-/**
- * probe_kernel_address(): safely attempt to read from a location
- * @addr: address to read from - its type is type typeof(retval)*
- * @retval: read into this variable
- *
- * Safely read from address @addr into variable @revtal. If a kernel fault
- * happens, handle that and return -EFAULT.
- * We ensure that the __get_user() is executed in atomic context so that
- * do_page_fault() doesn't attempt to take mmap_sem. This makes
- * probe_kernel_address() suitable for use within regions where the caller
- * already holds mmap_sem, or other locks which nest inside mmap_sem.
- * This must be a macro because __get_user() needs to know the types of the
- * args.
- *
- * We don't include enough header files to be able to do the set_fs(). We
- * require that the probe_kernel_address() caller will do that.
- */
-#define probe_kernel_address(addr, retval) \
- ({ \
- long ret; \
- mm_segment_t old_fs = get_fs(); \
- \
- set_fs(KERNEL_DS); \
- pagefault_disable(); \
- ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \
- pagefault_enable(); \
- set_fs(old_fs); \
- ret; \
- })
-
/*
* probe_kernel_read(): safely attempt to read from a location
* @dst: pointer to the buffer that shall take the data
@@ -131,4 +101,14 @@ extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size
extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
+/**
+ * probe_kernel_address(): safely attempt to read from a location
+ * @addr: address to read from
+ * @retval: read into this variable
+ *
+ * Returns 0 on success, or -EFAULT.
+ */
+#define probe_kernel_address(addr, retval) \
+ probe_kernel_read(&retval, addr, sizeof(retval))
+
#endif /* __LINUX_UACCESS_H__ */
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 9246d32dc973..e623d392db0c 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -14,12 +14,12 @@
#endif
#ifdef CONFIG_HIGHMEM
-#define HIGHMEM_ZONE(xx) , xx##_HIGH
+#define HIGHMEM_ZONE(xx) xx##_HIGH,
#else
#define HIGHMEM_ZONE(xx)
#endif
-#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx) , xx##_MOVABLE
+#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, HIGHMEM_ZONE(xx) xx##_MOVABLE
enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
FOR_ALL_ZONES(PGALLOC),
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 82e7db7f7100..5dbc8b0ee567 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -161,30 +161,8 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
}
#ifdef CONFIG_NUMA
-/*
- * Determine the per node value of a stat item. This function
- * is called frequently in a NUMA machine, so try to be as
- * frugal as possible.
- */
-static inline unsigned long node_page_state(int node,
- enum zone_stat_item item)
-{
- struct zone *zones = NODE_DATA(node)->node_zones;
-
- return
-#ifdef CONFIG_ZONE_DMA
- zone_page_state(&zones[ZONE_DMA], item) +
-#endif
-#ifdef CONFIG_ZONE_DMA32
- zone_page_state(&zones[ZONE_DMA32], item) +
-#endif
-#ifdef CONFIG_HIGHMEM
- zone_page_state(&zones[ZONE_HIGHMEM], item) +
-#endif
- zone_page_state(&zones[ZONE_NORMAL], item) +
- zone_page_state(&zones[ZONE_MOVABLE], item);
-}
+extern unsigned long node_page_state(int node, enum zone_stat_item item);
extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp);
#else
@@ -269,7 +247,6 @@ static inline void __dec_zone_page_state(struct page *page,
#define set_pgdat_percpu_threshold(pgdat, callback) { }
-static inline void refresh_cpu_vm_stats(int cpu) { }
static inline void refresh_zone_stat_thresholds(void) { }
static inline void cpu_vm_stats_fold(int cpu) { }
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index 9a6a3fe0fb51..c92d1e1cbad9 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -9,6 +9,62 @@
#include <linux/tracepoint.h>
#include <trace/events/gfpflags.h>
+#define COMPACTION_STATUS \
+ EM( COMPACT_DEFERRED, "deferred") \
+ EM( COMPACT_SKIPPED, "skipped") \
+ EM( COMPACT_CONTINUE, "continue") \
+ EM( COMPACT_PARTIAL, "partial") \
+ EM( COMPACT_COMPLETE, "complete") \
+ EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
+ EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
+ EMe(COMPACT_CONTENDED, "contended")
+
+#ifdef CONFIG_ZONE_DMA
+#define IFDEF_ZONE_DMA(X) X
+#else
+#define IFDEF_ZONE_DMA(X)
+#endif
+
+#ifdef CONFIG_ZONE_DMA32
+#define IFDEF_ZONE_DMA32(X) X
+#else
+#define IFDEF_ZONE_DMA32(X)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define IFDEF_ZONE_HIGHMEM(X) X
+#else
+#define IFDEF_ZONE_HIGHMEM(X)
+#endif
+
+#define ZONE_TYPE \
+ IFDEF_ZONE_DMA( EM (ZONE_DMA, "DMA")) \
+ IFDEF_ZONE_DMA32( EM (ZONE_DMA32, "DMA32")) \
+ EM (ZONE_NORMAL, "Normal") \
+ IFDEF_ZONE_HIGHMEM( EM (ZONE_HIGHMEM,"HighMem")) \
+ EMe(ZONE_MOVABLE,"Movable")
+
+/*
+ * First define the enums in the above macros to be exported to userspace
+ * via TRACE_DEFINE_ENUM().
+ */
+#undef EM
+#undef EMe
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define EMe(a, b) TRACE_DEFINE_ENUM(a);
+
+COMPACTION_STATUS
+ZONE_TYPE
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b) {a, b},
+#define EMe(a, b) {a, b}
+
DECLARE_EVENT_CLASS(mm_compaction_isolate_template,
TP_PROTO(
@@ -161,7 +217,7 @@ TRACE_EVENT(mm_compaction_end,
__entry->free_pfn,
__entry->zone_end,
__entry->sync ? "sync" : "async",
- compaction_status_string[__entry->status])
+ __print_symbolic(__entry->status, COMPACTION_STATUS))
);
TRACE_EVENT(mm_compaction_try_to_compact_pages,
@@ -201,23 +257,23 @@ DECLARE_EVENT_CLASS(mm_compaction_suitable_template,
TP_STRUCT__entry(
__field(int, nid)
- __field(char *, name)
+ __field(enum zone_type, idx)
__field(int, order)
__field(int, ret)
),
TP_fast_assign(
__entry->nid = zone_to_nid(zone);
- __entry->name = (char *)zone->name;
+ __entry->idx = zone_idx(zone);
__entry->order = order;
__entry->ret = ret;
),
TP_printk("node=%d zone=%-8s order=%d ret=%s",
__entry->nid,
- __entry->name,
+ __print_symbolic(__entry->idx, ZONE_TYPE),
__entry->order,
- compaction_status_string[__entry->ret])
+ __print_symbolic(__entry->ret, COMPACTION_STATUS))
);
DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_finished,
@@ -247,7 +303,7 @@ DECLARE_EVENT_CLASS(mm_compaction_defer_template,
TP_STRUCT__entry(
__field(int, nid)
- __field(char *, name)
+ __field(enum zone_type, idx)
__field(int, order)
__field(unsigned int, considered)
__field(unsigned int, defer_shift)
@@ -256,7 +312,7 @@ DECLARE_EVENT_CLASS(mm_compaction_defer_template,
TP_fast_assign(
__entry->nid = zone_to_nid(zone);
- __entry->name = (char *)zone->name;
+ __entry->idx = zone_idx(zone);
__entry->order = order;
__entry->considered = zone->compact_considered;
__entry->defer_shift = zone->compact_defer_shift;
@@ -265,7 +321,7 @@ DECLARE_EVENT_CLASS(mm_compaction_defer_template,
TP_printk("node=%d zone=%-8s order=%d order_failed=%d consider=%u limit=%lu",
__entry->nid,
- __entry->name,
+ __print_symbolic(__entry->idx, ZONE_TYPE),
__entry->order,
__entry->order_failed,
__entry->considered,
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index ddc3b36f1046..a74dd84bbb6d 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -25,6 +25,11 @@
# define MAP_UNINITIALIZED 0x0 /* Don't support this flag */
#endif
+/*
+ * Flags for mlock
+ */
+#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */
+
#define MS_ASYNC 1 /* sync memory asynchronously */
#define MS_INVALIDATE 2 /* invalidate the caches */
#define MS_SYNC 4 /* synchronous memory sync */
diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h
index e9fe6fd2a074..7162cd4cca73 100644
--- a/include/uapi/asm-generic/mman.h
+++ b/include/uapi/asm-generic/mman.h
@@ -17,5 +17,6 @@
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
+#define MCL_ONFAULT 4 /* lock all pages that are faulted in */
#endif /* __ASM_GENERIC_MMAN_H */
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index ee124009e12a..1324b0292ec2 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -713,9 +713,11 @@ __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
__SYSCALL(__NR_userfaultfd, sys_userfaultfd)
#define __NR_membarrier 283
__SYSCALL(__NR_membarrier, sys_membarrier)
+#define __NR_mlock2 284
+__SYSCALL(__NR_mlock2, sys_mlock2)
#undef __NR_syscalls
-#define __NR_syscalls 284
+#define __NR_syscalls 285
/*
* All syscalls below here should go away really,