diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-29 01:36:48 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-29 01:36:48 +0200 |
commit | 1c88e19b0f6a8471ee50d5062721ba30b8fd4ba9 (patch) | |
tree | 6d227487ca2cf391589c73af1c40ec7b7126feec /fs | |
parent | Merge tag 'dmaengine-4.8-rc1' of git://git.infradead.org/users/vkoul/slave-dma (diff) | |
parent | mm, compaction: simplify contended compaction handling (diff) | |
download | linux-1c88e19b0f6a8471ee50d5062721ba30b8fd4ba9.tar.xz linux-1c88e19b0f6a8471ee50d5062721ba30b8fd4ba9.zip |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
"The rest of MM"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (101 commits)
mm, compaction: simplify contended compaction handling
mm, compaction: introduce direct compaction priority
mm, thp: remove __GFP_NORETRY from khugepaged and madvised allocations
mm, page_alloc: make THP-specific decisions more generic
mm, page_alloc: restructure direct compaction handling in slowpath
mm, page_alloc: don't retry initial attempt in slowpath
mm, page_alloc: set alloc_flags only once in slowpath
lib/stackdepot.c: use __GFP_NOWARN for stack allocations
mm, kasan: switch SLUB to stackdepot, enable memory quarantine for SLUB
mm, kasan: account for object redzone in SLUB's nearest_obj()
mm: fix use-after-free if memory allocation failed in vma_adjust()
zsmalloc: Delete an unnecessary check before the function call "iput"
mm/memblock.c: fix index adjustment error in __next_mem_range_rev()
mem-hotplug: alloc new page from a nearest neighbor node when mem-offline
mm: optimize copy_page_to/from_iter_iovec
mm: add cond_resched() to generic_swapfile_activate()
Revert "mm, mempool: only set __GFP_NOMEMALLOC if there are free elements"
mm, compaction: don't isolate PageWriteback pages in MIGRATE_SYNC_LIGHT mode
mm: hwpoison: remove incorrect comments
make __section_nr() more efficient
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/fs-writeback.c | 4 | ||||
-rw-r--r-- | fs/fuse/file.c | 8 | ||||
-rw-r--r-- | fs/nfs/internal.h | 2 | ||||
-rw-r--r-- | fs/nfs/write.c | 2 | ||||
-rw-r--r-- | fs/proc/base.c | 185 | ||||
-rw-r--r-- | fs/proc/meminfo.c | 22 |
6 files changed, 115 insertions, 108 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 6f9c9f6f5157..56c8fda436c0 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1807,8 +1807,8 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb) */ static unsigned long get_nr_dirty_pages(void) { - return global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS) + + return global_node_page_state(NR_FILE_DIRTY) + + global_node_page_state(NR_UNSTABLE_NFS) + get_nr_dirty_inodes(); } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 9154f8679024..2382f22a2a8b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1452,7 +1452,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) list_del(&req->writepages_entry); for (i = 0; i < req->num_pages; i++) { dec_wb_stat(&bdi->wb, WB_WRITEBACK); - dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP); + dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP); wb_writeout_inc(&bdi->wb); } wake_up(&fi->page_waitq); @@ -1642,7 +1642,7 @@ static int fuse_writepage_locked(struct page *page) req->inode = inode; inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); - inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); + inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); spin_lock(&fc->lock); list_add(&req->writepages_entry, &fi->writepages); @@ -1756,7 +1756,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, spin_unlock(&fc->lock); dec_wb_stat(&bdi->wb, WB_WRITEBACK); - dec_zone_page_state(page, NR_WRITEBACK_TEMP); + dec_node_page_state(page, NR_WRITEBACK_TEMP); wb_writeout_inc(&bdi->wb); fuse_writepage_free(fc, new_req); fuse_request_free(new_req); @@ -1855,7 +1855,7 @@ static int fuse_writepages_fill(struct page *page, req->page_descs[req->num_pages].length = PAGE_SIZE; inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); - inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); + inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); err = 0; if (is_writeback && fuse_writepage_in_flight(req, page)) { diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5154fa65a2f2..5ea04d87fc65 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -623,7 +623,7 @@ void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo) if (!cinfo->dreq) { struct inode *inode = page_file_mapping(page)->host; - inc_zone_page_state(page, NR_UNSTABLE_NFS); + inc_node_page_state(page, NR_UNSTABLE_NFS); inc_wb_stat(&inode_to_bdi(inode)->wb, WB_RECLAIMABLE); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e1c74d3db64d..593fa21a02c0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -898,7 +898,7 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, static void nfs_clear_page_commit(struct page *page) { - dec_zone_page_state(page, NR_UNSTABLE_NFS); + dec_node_page_state(page, NR_UNSTABLE_NFS); dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb, WB_RECLAIMABLE); } diff --git a/fs/proc/base.c b/fs/proc/base.c index a11eb7196ec8..31370da2ee7c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1024,23 +1024,107 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, char buffer[PROC_NUMBUF]; int oom_adj = OOM_ADJUST_MIN; size_t len; - unsigned long flags; if (!task) return -ESRCH; - if (lock_task_sighand(task, &flags)) { - if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) - oom_adj = OOM_ADJUST_MAX; - else - oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / - OOM_SCORE_ADJ_MAX; - unlock_task_sighand(task, &flags); - } + if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) + oom_adj = OOM_ADJUST_MAX; + else + oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / + OOM_SCORE_ADJ_MAX; put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); return simple_read_from_buffer(buf, count, ppos, buffer, len); } +static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) +{ + static DEFINE_MUTEX(oom_adj_mutex); + struct mm_struct *mm = NULL; + struct task_struct *task; + int err = 0; + + task = get_proc_task(file_inode(file)); + if (!task) + return -ESRCH; + + mutex_lock(&oom_adj_mutex); + if (legacy) { + if (oom_adj < task->signal->oom_score_adj && + !capable(CAP_SYS_RESOURCE)) { + err = -EACCES; + goto err_unlock; + } + /* + * /proc/pid/oom_adj is provided for legacy purposes, ask users to use + * /proc/pid/oom_score_adj instead. + */ + pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", + current->comm, task_pid_nr(current), task_pid_nr(task), + task_pid_nr(task)); + } else { + if ((short)oom_adj < task->signal->oom_score_adj_min && + !capable(CAP_SYS_RESOURCE)) { + err = -EACCES; + goto err_unlock; + } + } + + /* + * Make sure we will check other processes sharing the mm if this is + * not vfrok which wants its own oom_score_adj. + * pin the mm so it doesn't go away and get reused after task_unlock + */ + if (!task->vfork_done) { + struct task_struct *p = find_lock_task_mm(task); + + if (p) { + if (atomic_read(&p->mm->mm_users) > 1) { + mm = p->mm; + atomic_inc(&mm->mm_count); + } + task_unlock(p); + } + } + + task->signal->oom_score_adj = oom_adj; + if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) + task->signal->oom_score_adj_min = (short)oom_adj; + trace_oom_score_adj_update(task); + + if (mm) { + struct task_struct *p; + + rcu_read_lock(); + for_each_process(p) { + if (same_thread_group(task, p)) + continue; + + /* do not touch kernel threads or the global init */ + if (p->flags & PF_KTHREAD || is_global_init(p)) + continue; + + task_lock(p); + if (!p->vfork_done && process_shares_mm(p, mm)) { + pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n", + task_pid_nr(p), p->comm, + p->signal->oom_score_adj, oom_adj, + task_pid_nr(task), task->comm); + p->signal->oom_score_adj = oom_adj; + if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) + p->signal->oom_score_adj_min = (short)oom_adj; + } + task_unlock(p); + } + rcu_read_unlock(); + mmdrop(mm); + } +err_unlock: + mutex_unlock(&oom_adj_mutex); + put_task_struct(task); + return err; +} + /* * /proc/pid/oom_adj exists solely for backwards compatibility with previous * kernels. The effective policy is defined by oom_score_adj, which has a @@ -1054,10 +1138,8 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, static ssize_t oom_adj_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *task; char buffer[PROC_NUMBUF]; int oom_adj; - unsigned long flags; int err; memset(buffer, 0, sizeof(buffer)); @@ -1077,23 +1159,6 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf, goto out; } - task = get_proc_task(file_inode(file)); - if (!task) { - err = -ESRCH; - goto out; - } - - task_lock(task); - if (!task->mm) { - err = -EINVAL; - goto err_task_lock; - } - - if (!lock_task_sighand(task, &flags)) { - err = -ESRCH; - goto err_task_lock; - } - /* * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum * value is always attainable. @@ -1103,27 +1168,7 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf, else oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; - if (oom_adj < task->signal->oom_score_adj && - !capable(CAP_SYS_RESOURCE)) { - err = -EACCES; - goto err_sighand; - } - - /* - * /proc/pid/oom_adj is provided for legacy purposes, ask users to use - * /proc/pid/oom_score_adj instead. - */ - pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", - current->comm, task_pid_nr(current), task_pid_nr(task), - task_pid_nr(task)); - - task->signal->oom_score_adj = oom_adj; - trace_oom_score_adj_update(task); -err_sighand: - unlock_task_sighand(task, &flags); -err_task_lock: - task_unlock(task); - put_task_struct(task); + err = __set_oom_adj(file, oom_adj, true); out: return err < 0 ? err : count; } @@ -1140,15 +1185,11 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf, struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; short oom_score_adj = OOM_SCORE_ADJ_MIN; - unsigned long flags; size_t len; if (!task) return -ESRCH; - if (lock_task_sighand(task, &flags)) { - oom_score_adj = task->signal->oom_score_adj; - unlock_task_sighand(task, &flags); - } + oom_score_adj = task->signal->oom_score_adj; put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj); return simple_read_from_buffer(buf, count, ppos, buffer, len); @@ -1157,9 +1198,7 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf, static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *task; char buffer[PROC_NUMBUF]; - unsigned long flags; int oom_score_adj; int err; @@ -1180,39 +1219,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, goto out; } - task = get_proc_task(file_inode(file)); - if (!task) { - err = -ESRCH; - goto out; - } - - task_lock(task); - if (!task->mm) { - err = -EINVAL; - goto err_task_lock; - } - - if (!lock_task_sighand(task, &flags)) { - err = -ESRCH; - goto err_task_lock; - } - - if ((short)oom_score_adj < task->signal->oom_score_adj_min && - !capable(CAP_SYS_RESOURCE)) { - err = -EACCES; - goto err_sighand; - } - - task->signal->oom_score_adj = (short)oom_score_adj; - if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) - task->signal->oom_score_adj_min = (short)oom_score_adj; - trace_oom_score_adj_update(task); - -err_sighand: - unlock_task_sighand(task, &flags); -err_task_lock: - task_unlock(task); - put_task_struct(task); + err = __set_oom_adj(file, oom_score_adj, false); out: return err < 0 ? err : count; } diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index cf301a9ef512..09e18fdf61e5 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -40,7 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) si_swapinfo(&i); committed = percpu_counter_read_positive(&vm_committed_as); - cached = global_page_state(NR_FILE_PAGES) - + cached = global_node_page_state(NR_FILE_PAGES) - total_swapcache_pages() - i.bufferram; if (cached < 0) cached = 0; @@ -138,23 +138,23 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #endif K(i.totalswap), K(i.freeswap), - K(global_page_state(NR_FILE_DIRTY)), - K(global_page_state(NR_WRITEBACK)), - K(global_page_state(NR_ANON_PAGES)), - K(global_page_state(NR_FILE_MAPPED)), + K(global_node_page_state(NR_FILE_DIRTY)), + K(global_node_page_state(NR_WRITEBACK)), + K(global_node_page_state(NR_ANON_MAPPED)), + K(global_node_page_state(NR_FILE_MAPPED)), K(i.sharedram), K(global_page_state(NR_SLAB_RECLAIMABLE) + global_page_state(NR_SLAB_UNRECLAIMABLE)), K(global_page_state(NR_SLAB_RECLAIMABLE)), K(global_page_state(NR_SLAB_UNRECLAIMABLE)), - global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024, + global_page_state(NR_KERNEL_STACK_KB), K(global_page_state(NR_PAGETABLE)), #ifdef CONFIG_QUICKLIST K(quicklist_total_size()), #endif - K(global_page_state(NR_UNSTABLE_NFS)), + K(global_node_page_state(NR_UNSTABLE_NFS)), K(global_page_state(NR_BOUNCE)), - K(global_page_state(NR_WRITEBACK_TEMP)), + K(global_node_page_state(NR_WRITEBACK_TEMP)), K(vm_commit_limit()), K(committed), (unsigned long)VMALLOC_TOTAL >> 10, @@ -164,9 +164,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) , atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10) #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE - , K(global_page_state(NR_ANON_THPS) * HPAGE_PMD_NR) - , K(global_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR) - , K(global_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR) + , K(global_node_page_state(NR_ANON_THPS) * HPAGE_PMD_NR) + , K(global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR) + , K(global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR) #endif #ifdef CONFIG_CMA , K(totalcma_pages) |