summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-29 01:36:48 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-29 01:36:48 +0200
commit1c88e19b0f6a8471ee50d5062721ba30b8fd4ba9 (patch)
tree6d227487ca2cf391589c73af1c40ec7b7126feec /fs
parentMerge tag 'dmaengine-4.8-rc1' of git://git.infradead.org/users/vkoul/slave-dma (diff)
parentmm, compaction: simplify contended compaction handling (diff)
downloadlinux-1c88e19b0f6a8471ee50d5062721ba30b8fd4ba9.tar.xz
linux-1c88e19b0f6a8471ee50d5062721ba30b8fd4ba9.zip
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: "The rest of MM" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (101 commits) mm, compaction: simplify contended compaction handling mm, compaction: introduce direct compaction priority mm, thp: remove __GFP_NORETRY from khugepaged and madvised allocations mm, page_alloc: make THP-specific decisions more generic mm, page_alloc: restructure direct compaction handling in slowpath mm, page_alloc: don't retry initial attempt in slowpath mm, page_alloc: set alloc_flags only once in slowpath lib/stackdepot.c: use __GFP_NOWARN for stack allocations mm, kasan: switch SLUB to stackdepot, enable memory quarantine for SLUB mm, kasan: account for object redzone in SLUB's nearest_obj() mm: fix use-after-free if memory allocation failed in vma_adjust() zsmalloc: Delete an unnecessary check before the function call "iput" mm/memblock.c: fix index adjustment error in __next_mem_range_rev() mem-hotplug: alloc new page from a nearest neighbor node when mem-offline mm: optimize copy_page_to/from_iter_iovec mm: add cond_resched() to generic_swapfile_activate() Revert "mm, mempool: only set __GFP_NOMEMALLOC if there are free elements" mm, compaction: don't isolate PageWriteback pages in MIGRATE_SYNC_LIGHT mode mm: hwpoison: remove incorrect comments make __section_nr() more efficient ...
Diffstat (limited to 'fs')
-rw-r--r--fs/fs-writeback.c4
-rw-r--r--fs/fuse/file.c8
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/proc/base.c185
-rw-r--r--fs/proc/meminfo.c22
6 files changed, 115 insertions, 108 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 6f9c9f6f5157..56c8fda436c0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1807,8 +1807,8 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
*/
static unsigned long get_nr_dirty_pages(void)
{
- return global_page_state(NR_FILE_DIRTY) +
- global_page_state(NR_UNSTABLE_NFS) +
+ return global_node_page_state(NR_FILE_DIRTY) +
+ global_node_page_state(NR_UNSTABLE_NFS) +
get_nr_dirty_inodes();
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9154f8679024..2382f22a2a8b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1452,7 +1452,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
list_del(&req->writepages_entry);
for (i = 0; i < req->num_pages; i++) {
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
- dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP);
+ dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP);
wb_writeout_inc(&bdi->wb);
}
wake_up(&fi->page_waitq);
@@ -1642,7 +1642,7 @@ static int fuse_writepage_locked(struct page *page)
req->inode = inode;
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
- inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
+ inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
spin_lock(&fc->lock);
list_add(&req->writepages_entry, &fi->writepages);
@@ -1756,7 +1756,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
spin_unlock(&fc->lock);
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
- dec_zone_page_state(page, NR_WRITEBACK_TEMP);
+ dec_node_page_state(page, NR_WRITEBACK_TEMP);
wb_writeout_inc(&bdi->wb);
fuse_writepage_free(fc, new_req);
fuse_request_free(new_req);
@@ -1855,7 +1855,7 @@ static int fuse_writepages_fill(struct page *page,
req->page_descs[req->num_pages].length = PAGE_SIZE;
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
- inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
+ inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
err = 0;
if (is_writeback && fuse_writepage_in_flight(req, page)) {
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5154fa65a2f2..5ea04d87fc65 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -623,7 +623,7 @@ void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo)
if (!cinfo->dreq) {
struct inode *inode = page_file_mapping(page)->host;
- inc_zone_page_state(page, NR_UNSTABLE_NFS);
+ inc_node_page_state(page, NR_UNSTABLE_NFS);
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_RECLAIMABLE);
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e1c74d3db64d..593fa21a02c0 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -898,7 +898,7 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
static void
nfs_clear_page_commit(struct page *page)
{
- dec_zone_page_state(page, NR_UNSTABLE_NFS);
+ dec_node_page_state(page, NR_UNSTABLE_NFS);
dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,
WB_RECLAIMABLE);
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a11eb7196ec8..31370da2ee7c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1024,23 +1024,107 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
char buffer[PROC_NUMBUF];
int oom_adj = OOM_ADJUST_MIN;
size_t len;
- unsigned long flags;
if (!task)
return -ESRCH;
- if (lock_task_sighand(task, &flags)) {
- if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
- oom_adj = OOM_ADJUST_MAX;
- else
- oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
- OOM_SCORE_ADJ_MAX;
- unlock_task_sighand(task, &flags);
- }
+ if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
+ oom_adj = OOM_ADJUST_MAX;
+ else
+ oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
+ OOM_SCORE_ADJ_MAX;
put_task_struct(task);
len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
return simple_read_from_buffer(buf, count, ppos, buffer, len);
}
+static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
+{
+ static DEFINE_MUTEX(oom_adj_mutex);
+ struct mm_struct *mm = NULL;
+ struct task_struct *task;
+ int err = 0;
+
+ task = get_proc_task(file_inode(file));
+ if (!task)
+ return -ESRCH;
+
+ mutex_lock(&oom_adj_mutex);
+ if (legacy) {
+ if (oom_adj < task->signal->oom_score_adj &&
+ !capable(CAP_SYS_RESOURCE)) {
+ err = -EACCES;
+ goto err_unlock;
+ }
+ /*
+ * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
+ * /proc/pid/oom_score_adj instead.
+ */
+ pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
+ current->comm, task_pid_nr(current), task_pid_nr(task),
+ task_pid_nr(task));
+ } else {
+ if ((short)oom_adj < task->signal->oom_score_adj_min &&
+ !capable(CAP_SYS_RESOURCE)) {
+ err = -EACCES;
+ goto err_unlock;
+ }
+ }
+
+ /*
+ * Make sure we will check other processes sharing the mm if this is
+ * not vfrok which wants its own oom_score_adj.
+ * pin the mm so it doesn't go away and get reused after task_unlock
+ */
+ if (!task->vfork_done) {
+ struct task_struct *p = find_lock_task_mm(task);
+
+ if (p) {
+ if (atomic_read(&p->mm->mm_users) > 1) {
+ mm = p->mm;
+ atomic_inc(&mm->mm_count);
+ }
+ task_unlock(p);
+ }
+ }
+
+ task->signal->oom_score_adj = oom_adj;
+ if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
+ task->signal->oom_score_adj_min = (short)oom_adj;
+ trace_oom_score_adj_update(task);
+
+ if (mm) {
+ struct task_struct *p;
+
+ rcu_read_lock();
+ for_each_process(p) {
+ if (same_thread_group(task, p))
+ continue;
+
+ /* do not touch kernel threads or the global init */
+ if (p->flags & PF_KTHREAD || is_global_init(p))
+ continue;
+
+ task_lock(p);
+ if (!p->vfork_done && process_shares_mm(p, mm)) {
+ pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n",
+ task_pid_nr(p), p->comm,
+ p->signal->oom_score_adj, oom_adj,
+ task_pid_nr(task), task->comm);
+ p->signal->oom_score_adj = oom_adj;
+ if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
+ p->signal->oom_score_adj_min = (short)oom_adj;
+ }
+ task_unlock(p);
+ }
+ rcu_read_unlock();
+ mmdrop(mm);
+ }
+err_unlock:
+ mutex_unlock(&oom_adj_mutex);
+ put_task_struct(task);
+ return err;
+}
+
/*
* /proc/pid/oom_adj exists solely for backwards compatibility with previous
* kernels. The effective policy is defined by oom_score_adj, which has a
@@ -1054,10 +1138,8 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
static ssize_t oom_adj_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct task_struct *task;
char buffer[PROC_NUMBUF];
int oom_adj;
- unsigned long flags;
int err;
memset(buffer, 0, sizeof(buffer));
@@ -1077,23 +1159,6 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf,
goto out;
}
- task = get_proc_task(file_inode(file));
- if (!task) {
- err = -ESRCH;
- goto out;
- }
-
- task_lock(task);
- if (!task->mm) {
- err = -EINVAL;
- goto err_task_lock;
- }
-
- if (!lock_task_sighand(task, &flags)) {
- err = -ESRCH;
- goto err_task_lock;
- }
-
/*
* Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
* value is always attainable.
@@ -1103,27 +1168,7 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf,
else
oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
- if (oom_adj < task->signal->oom_score_adj &&
- !capable(CAP_SYS_RESOURCE)) {
- err = -EACCES;
- goto err_sighand;
- }
-
- /*
- * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
- * /proc/pid/oom_score_adj instead.
- */
- pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
- current->comm, task_pid_nr(current), task_pid_nr(task),
- task_pid_nr(task));
-
- task->signal->oom_score_adj = oom_adj;
- trace_oom_score_adj_update(task);
-err_sighand:
- unlock_task_sighand(task, &flags);
-err_task_lock:
- task_unlock(task);
- put_task_struct(task);
+ err = __set_oom_adj(file, oom_adj, true);
out:
return err < 0 ? err : count;
}
@@ -1140,15 +1185,11 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
struct task_struct *task = get_proc_task(file_inode(file));
char buffer[PROC_NUMBUF];
short oom_score_adj = OOM_SCORE_ADJ_MIN;
- unsigned long flags;
size_t len;
if (!task)
return -ESRCH;
- if (lock_task_sighand(task, &flags)) {
- oom_score_adj = task->signal->oom_score_adj;
- unlock_task_sighand(task, &flags);
- }
+ oom_score_adj = task->signal->oom_score_adj;
put_task_struct(task);
len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj);
return simple_read_from_buffer(buf, count, ppos, buffer, len);
@@ -1157,9 +1198,7 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct task_struct *task;
char buffer[PROC_NUMBUF];
- unsigned long flags;
int oom_score_adj;
int err;
@@ -1180,39 +1219,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
goto out;
}
- task = get_proc_task(file_inode(file));
- if (!task) {
- err = -ESRCH;
- goto out;
- }
-
- task_lock(task);
- if (!task->mm) {
- err = -EINVAL;
- goto err_task_lock;
- }
-
- if (!lock_task_sighand(task, &flags)) {
- err = -ESRCH;
- goto err_task_lock;
- }
-
- if ((short)oom_score_adj < task->signal->oom_score_adj_min &&
- !capable(CAP_SYS_RESOURCE)) {
- err = -EACCES;
- goto err_sighand;
- }
-
- task->signal->oom_score_adj = (short)oom_score_adj;
- if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
- task->signal->oom_score_adj_min = (short)oom_score_adj;
- trace_oom_score_adj_update(task);
-
-err_sighand:
- unlock_task_sighand(task, &flags);
-err_task_lock:
- task_unlock(task);
- put_task_struct(task);
+ err = __set_oom_adj(file, oom_score_adj, false);
out:
return err < 0 ? err : count;
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index cf301a9ef512..09e18fdf61e5 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -40,7 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
si_swapinfo(&i);
committed = percpu_counter_read_positive(&vm_committed_as);
- cached = global_page_state(NR_FILE_PAGES) -
+ cached = global_node_page_state(NR_FILE_PAGES) -
total_swapcache_pages() - i.bufferram;
if (cached < 0)
cached = 0;
@@ -138,23 +138,23 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
#endif
K(i.totalswap),
K(i.freeswap),
- K(global_page_state(NR_FILE_DIRTY)),
- K(global_page_state(NR_WRITEBACK)),
- K(global_page_state(NR_ANON_PAGES)),
- K(global_page_state(NR_FILE_MAPPED)),
+ K(global_node_page_state(NR_FILE_DIRTY)),
+ K(global_node_page_state(NR_WRITEBACK)),
+ K(global_node_page_state(NR_ANON_MAPPED)),
+ K(global_node_page_state(NR_FILE_MAPPED)),
K(i.sharedram),
K(global_page_state(NR_SLAB_RECLAIMABLE) +
global_page_state(NR_SLAB_UNRECLAIMABLE)),
K(global_page_state(NR_SLAB_RECLAIMABLE)),
K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
- global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024,
+ global_page_state(NR_KERNEL_STACK_KB),
K(global_page_state(NR_PAGETABLE)),
#ifdef CONFIG_QUICKLIST
K(quicklist_total_size()),
#endif
- K(global_page_state(NR_UNSTABLE_NFS)),
+ K(global_node_page_state(NR_UNSTABLE_NFS)),
K(global_page_state(NR_BOUNCE)),
- K(global_page_state(NR_WRITEBACK_TEMP)),
+ K(global_node_page_state(NR_WRITEBACK_TEMP)),
K(vm_commit_limit()),
K(committed),
(unsigned long)VMALLOC_TOTAL >> 10,
@@ -164,9 +164,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
, atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- , K(global_page_state(NR_ANON_THPS) * HPAGE_PMD_NR)
- , K(global_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR)
- , K(global_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR)
+ , K(global_node_page_state(NR_ANON_THPS) * HPAGE_PMD_NR)
+ , K(global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR)
+ , K(global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR)
#endif
#ifdef CONFIG_CMA
, K(totalcma_pages)