summaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
authorMike Marshall <hubcap@omnibond.com>2016-03-14 20:39:42 +0100
committerMike Marshall <hubcap@omnibond.com>2016-03-14 20:39:42 +0100
commitab6652524aaf834d5dcdb46dd7695813b8d63da5 (patch)
treebb3876a9b61254be902416f7dbf578deb28e9f22 /fs/proc
parentorangefs: make fs_mount_pending static (diff)
parentLinux 4.5 (diff)
downloadlinux-ab6652524aaf834d5dcdb46dd7695813b8d63da5.tar.xz
linux-ab6652524aaf834d5dcdb46dd7695813b8d63da5.zip
Orangefs: merge to v4.5
Merge tag 'v4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux into current Linux 4.5
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c75
-rw-r--r--fs/proc/fd.c1
-rw-r--r--fs/proc/inode.c24
-rw-r--r--fs/proc/kcore.c4
-rw-r--r--fs/proc/meminfo.c5
-rw-r--r--fs/proc/namespaces.c14
-rw-r--r--fs/proc/page.c4
-rw-r--r--fs/proc/self.c22
-rw-r--r--fs/proc/task_mmu.c210
-rw-r--r--fs/proc/task_nommu.c49
-rw-r--r--fs/proc/thread_self.c23
12 files changed, 252 insertions, 181 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c
index d73291f5f0fc..b6c00ce0e29e 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -395,7 +395,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
state = *get_task_state(task);
vsize = eip = esp = 0;
- permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT);
+ permitted = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS | PTRACE_MODE_NOAUDIT);
mm = get_task_mm(task);
if (mm) {
vsize = task_vsize(mm);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4bd5d3118acd..4f764c2ac1a5 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -403,7 +403,7 @@ static const struct file_operations proc_pid_cmdline_ops = {
static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
- struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ);
+ struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
if (mm && !IS_ERR(mm)) {
unsigned int nwords = 0;
do {
@@ -430,7 +430,8 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
wchan = get_wchan(task);
- if (wchan && ptrace_may_access(task, PTRACE_MODE_READ) && !lookup_symbol_name(wchan, symname))
+ if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)
+ && !lookup_symbol_name(wchan, symname))
seq_printf(m, "%s", symname);
else
seq_putc(m, '0');
@@ -444,7 +445,7 @@ static int lock_trace(struct task_struct *task)
int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
if (err)
return err;
- if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
+ if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
mutex_unlock(&task->signal->cred_guard_mutex);
return -EPERM;
}
@@ -697,7 +698,7 @@ static int proc_fd_access_allowed(struct inode *inode)
*/
task = get_proc_task(inode);
if (task) {
- allowed = ptrace_may_access(task, PTRACE_MODE_READ);
+ allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
put_task_struct(task);
}
return allowed;
@@ -732,7 +733,7 @@ static bool has_pid_permissions(struct pid_namespace *pid,
return true;
if (in_group_p(pid->pid_gid))
return true;
- return ptrace_may_access(task, PTRACE_MODE_READ);
+ return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
}
@@ -809,7 +810,7 @@ struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
struct mm_struct *mm = ERR_PTR(-ESRCH);
if (task) {
- mm = mm_access(task, mode);
+ mm = mm_access(task, mode | PTRACE_MODE_FSCREDS);
put_task_struct(task);
if (!IS_ERR_OR_NULL(mm)) {
@@ -952,6 +953,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
unsigned long src = *ppos;
int ret = 0;
struct mm_struct *mm = file->private_data;
+ unsigned long env_start, env_end;
if (!mm)
return 0;
@@ -963,19 +965,25 @@ static ssize_t environ_read(struct file *file, char __user *buf,
ret = 0;
if (!atomic_inc_not_zero(&mm->mm_users))
goto free;
+
+ down_read(&mm->mmap_sem);
+ env_start = mm->env_start;
+ env_end = mm->env_end;
+ up_read(&mm->mmap_sem);
+
while (count > 0) {
size_t this_len, max_len;
int retval;
- if (src >= (mm->env_end - mm->env_start))
+ if (src >= (env_end - env_start))
break;
- this_len = mm->env_end - (mm->env_start + src);
+ this_len = env_end - (env_start + src);
max_len = min_t(size_t, PAGE_SIZE, count);
this_len = min(max_len, this_len);
- retval = access_remote_vm(mm, (mm->env_start + src),
+ retval = access_remote_vm(mm, (env_start + src),
page, this_len, 0);
if (retval <= 0) {
@@ -1564,12 +1572,16 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
return -ENOENT;
}
-static const char *proc_pid_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_pid_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
struct path path;
int error = -EACCES;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
/* Are we allowed to snoop on the tasks file descriptors? */
if (!proc_fd_access_allowed(inode))
goto out;
@@ -1630,7 +1642,7 @@ out:
const struct inode_operations proc_pid_link_inode_operations = {
.readlink = proc_pid_readlink,
- .follow_link = proc_pid_follow_link,
+ .get_link = proc_pid_get_link,
.setattr = proc_setattr,
};
@@ -1856,7 +1868,7 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
if (!task)
goto out_notask;
- mm = mm_access(task, PTRACE_MODE_READ);
+ mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
if (IS_ERR_OR_NULL(mm))
goto out;
@@ -1895,7 +1907,7 @@ static const struct dentry_operations tid_map_files_dentry_operations = {
.d_delete = pid_delete_dentry,
};
-static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
+static int map_files_get_link(struct dentry *dentry, struct path *path)
{
unsigned long vm_start, vm_end;
struct vm_area_struct *vma;
@@ -1945,20 +1957,22 @@ struct map_files_info {
* path to the file in question.
*/
static const char *
-proc_map_files_follow_link(struct dentry *dentry, void **cookie)
+proc_map_files_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
- return proc_pid_follow_link(dentry, NULL);
+ return proc_pid_get_link(dentry, inode, done);
}
/*
- * Identical to proc_pid_link_inode_operations except for follow_link()
+ * Identical to proc_pid_link_inode_operations except for get_link()
*/
static const struct inode_operations proc_map_files_link_inode_operations = {
.readlink = proc_pid_readlink,
- .follow_link = proc_map_files_follow_link,
+ .get_link = proc_map_files_get_link,
.setattr = proc_setattr,
};
@@ -1975,7 +1989,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
return -ENOENT;
ei = PROC_I(inode);
- ei->op.proc_get_link = proc_map_files_get_link;
+ ei->op.proc_get_link = map_files_get_link;
inode->i_op = &proc_map_files_link_inode_operations;
inode->i_size = 64;
@@ -2007,7 +2021,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
goto out;
result = -EACCES;
- if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
goto out_put_task;
result = -ENOENT;
@@ -2060,7 +2074,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
goto out;
ret = -EACCES;
- if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
goto out_put_task;
ret = 0;
@@ -2359,7 +2373,7 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
size_t count, loff_t *ppos)
{
struct inode * inode = file_inode(file);
- char *page;
+ void *page;
ssize_t length;
struct task_struct *task = get_proc_task(inode);
@@ -2374,14 +2388,11 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
if (*ppos != 0)
goto out;
- length = -ENOMEM;
- page = (char*)__get_free_page(GFP_TEMPORARY);
- if (!page)
+ page = memdup_user(buf, count);
+ if (IS_ERR(page)) {
+ length = PTR_ERR(page);
goto out;
-
- length = -EFAULT;
- if (copy_from_user(page, buf, count))
- goto out_free;
+ }
/* Guard against adverse ptrace interaction */
length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
@@ -2390,10 +2401,10 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
length = security_setprocattr(task,
(char*)file->f_path.dentry->d_name.name,
- (void*)page, count);
+ page, count);
mutex_unlock(&task->signal->cred_guard_mutex);
out_free:
- free_page((unsigned long) page);
+ kfree(page);
out:
put_task_struct(task);
out_no_task:
@@ -2530,7 +2541,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
if (result)
return result;
- if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
+ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
result = -EACCES;
goto out_unlock;
}
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 3c2a915c695a..56afa5ef08f2 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -258,6 +258,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
name, len, instantiate, p,
(void *)(unsigned long)fd))
goto out_fd_loop;
+ cond_resched();
rcu_read_lock();
}
rcu_read_unlock();
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index bd95b9fdebb0..42305ddcbaa0 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -95,7 +95,8 @@ void __init proc_init_inodecache(void)
proc_inode_cachep = kmem_cache_create("proc_inode_cache",
sizeof(struct proc_inode),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_PANIC),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT|
+ SLAB_PANIC),
init_once);
}
@@ -393,24 +394,25 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
};
#endif
-static const char *proc_follow_link(struct dentry *dentry, void **cookie)
+static void proc_put_link(void *p)
{
- struct proc_dir_entry *pde = PDE(d_inode(dentry));
- if (unlikely(!use_pde(pde)))
- return ERR_PTR(-EINVAL);
- *cookie = pde;
- return pde->data;
+ unuse_pde(p);
}
-static void proc_put_link(struct inode *unused, void *p)
+static const char *proc_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- unuse_pde(p);
+ struct proc_dir_entry *pde = PDE(inode);
+ if (unlikely(!use_pde(pde)))
+ return ERR_PTR(-EINVAL);
+ set_delayed_call(done, proc_put_link, pde);
+ return pde->data;
}
const struct inode_operations proc_link_inode_operations = {
.readlink = generic_readlink,
- .follow_link = proc_follow_link,
- .put_link = proc_put_link,
+ .get_link = proc_get_link,
};
struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 92e6726f6e37..a939f5ed7f89 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -552,9 +552,9 @@ static int open_kcore(struct inode *inode, struct file *filp)
if (kcore_need_update)
kcore_update_ram();
if (i_size_read(inode) != proc_root_kcore->size) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i_size_write(inode, proc_root_kcore->size);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 9155a5a0d3b9..df4661abadc4 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -57,11 +57,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
/*
* Estimate the amount of memory available for userspace allocations,
* without causing swapping.
- *
- * Free memory cannot be taken below the low watermark, before the
- * system starts swapping.
*/
- available = i.freeram - wmark_low;
+ available = i.freeram - totalreserve_pages;
/*
* Not all the page cache can be freed, otherwise the system will
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index f6e8354b8cea..276f12431dbf 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -30,19 +30,23 @@ static const struct proc_ns_operations *ns_entries[] = {
&mntns_operations,
};
-static const char *proc_ns_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_ns_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
struct task_struct *task;
struct path ns_path;
void *error = ERR_PTR(-EACCES);
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
task = get_proc_task(inode);
if (!task)
return error;
- if (ptrace_may_access(task, PTRACE_MODE_READ)) {
+ if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
error = ns_get_path(&ns_path, task, ns_ops);
if (!error)
nd_jump_link(&ns_path);
@@ -63,7 +67,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
if (!task)
return res;
- if (ptrace_may_access(task, PTRACE_MODE_READ)) {
+ if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
res = ns_get_name(name, sizeof(name), task, ns_ops);
if (res >= 0)
res = readlink_copy(buffer, buflen, name);
@@ -74,7 +78,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
static const struct inode_operations proc_ns_link_inode_operations = {
.readlink = proc_ns_readlink,
- .follow_link = proc_ns_follow_link,
+ .get_link = proc_ns_get_link,
.setattr = proc_setattr,
};
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 93484034a03d..b2855eea5405 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -103,9 +103,9 @@ u64 stable_page_flags(struct page *page)
* pseudo flags for the well known (anonymous) memory mapped pages
*
* Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the
- * simple test in page_mapped() is not enough.
+ * simple test in page_mapcount() is not enough.
*/
- if (!PageSlab(page) && page_mapped(page))
+ if (!PageSlab(page) && page_mapcount(page))
u |= 1 << KPF_MMAP;
if (PageAnon(page))
u |= 1 << KPF_ANON;
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 113b8d061fc0..b6a8d3529fea 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -18,26 +18,28 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
return readlink_copy(buffer, buflen, tmp);
}
-static const char *proc_self_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_self_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+ struct pid_namespace *ns = inode->i_sb->s_fs_info;
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name;
if (!tgid)
return ERR_PTR(-ENOENT);
/* 11 for max length of signed int in decimal + NULL term */
- name = kmalloc(12, GFP_KERNEL);
- if (!name)
- return ERR_PTR(-ENOMEM);
+ name = kmalloc(12, dentry ? GFP_KERNEL : GFP_ATOMIC);
+ if (unlikely(!name))
+ return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD);
sprintf(name, "%d", tgid);
- return *cookie = name;
+ set_delayed_call(done, kfree_link, name);
+ return name;
}
static const struct inode_operations proc_self_inode_operations = {
.readlink = proc_self_readlink,
- .follow_link = proc_self_follow_link,
- .put_link = kfree_put_link,
+ .get_link = proc_self_get_link,
};
static unsigned self_inum;
@@ -48,7 +50,7 @@ int proc_setup_self(struct super_block *s)
struct pid_namespace *ns = s->s_fs_info;
struct dentry *self;
- mutex_lock(&root_inode->i_mutex);
+ inode_lock(root_inode);
self = d_alloc_name(s->s_root, "self");
if (self) {
struct inode *inode = new_inode_pseudo(s);
@@ -67,7 +69,7 @@ int proc_setup_self(struct super_block *s)
} else {
self = ERR_PTR(-ENOMEM);
}
- mutex_unlock(&root_inode->i_mutex);
+ inode_unlock(root_inode);
if (IS_ERR(self)) {
pr_err("proc_fill_super: can't allocate /proc/self\n");
return PTR_ERR(self);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 187b3b5f242e..fa95ab2d3674 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -14,6 +14,7 @@
#include <linux/swapops.h>
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
+#include <linux/shmem_fs.h>
#include <asm/elf.h>
#include <asm/uaccess.h>
@@ -22,9 +23,13 @@
void task_mem(struct seq_file *m, struct mm_struct *mm)
{
- unsigned long data, text, lib, swap, ptes, pmds;
+ unsigned long text, lib, swap, ptes, pmds, anon, file, shmem;
unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
+ anon = get_mm_counter(mm, MM_ANONPAGES);
+ file = get_mm_counter(mm, MM_FILEPAGES);
+ shmem = get_mm_counter(mm, MM_SHMEMPAGES);
+
/*
* Note: to minimize their overhead, mm maintains hiwater_vm and
* hiwater_rss only when about to *lower* total_vm or rss. Any
@@ -35,11 +40,10 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
hiwater_vm = total_vm = mm->total_vm;
if (hiwater_vm < mm->hiwater_vm)
hiwater_vm = mm->hiwater_vm;
- hiwater_rss = total_rss = get_mm_rss(mm);
+ hiwater_rss = total_rss = anon + file + shmem;
if (hiwater_rss < mm->hiwater_rss)
hiwater_rss = mm->hiwater_rss;
- data = mm->total_vm - mm->shared_vm - mm->stack_vm;
text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
swap = get_mm_counter(mm, MM_SWAPENTS);
@@ -52,6 +56,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
"VmPin:\t%8lu kB\n"
"VmHWM:\t%8lu kB\n"
"VmRSS:\t%8lu kB\n"
+ "RssAnon:\t%8lu kB\n"
+ "RssFile:\t%8lu kB\n"
+ "RssShmem:\t%8lu kB\n"
"VmData:\t%8lu kB\n"
"VmStk:\t%8lu kB\n"
"VmExe:\t%8lu kB\n"
@@ -65,7 +72,10 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
mm->pinned_vm << (PAGE_SHIFT-10),
hiwater_rss << (PAGE_SHIFT-10),
total_rss << (PAGE_SHIFT-10),
- data << (PAGE_SHIFT-10),
+ anon << (PAGE_SHIFT-10),
+ file << (PAGE_SHIFT-10),
+ shmem << (PAGE_SHIFT-10),
+ mm->data_vm << (PAGE_SHIFT-10),
mm->stack_vm << (PAGE_SHIFT-10), text, lib,
ptes >> 10,
pmds >> 10,
@@ -82,10 +92,11 @@ unsigned long task_statm(struct mm_struct *mm,
unsigned long *shared, unsigned long *text,
unsigned long *data, unsigned long *resident)
{
- *shared = get_mm_counter(mm, MM_FILEPAGES);
+ *shared = get_mm_counter(mm, MM_FILEPAGES) +
+ get_mm_counter(mm, MM_SHMEMPAGES);
*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
>> PAGE_SHIFT;
- *data = mm->total_vm - mm->shared_vm;
+ *data = mm->data_vm + mm->stack_vm;
*resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
return mm->total_vm;
}
@@ -248,23 +259,29 @@ static int do_maps_open(struct inode *inode, struct file *file,
sizeof(struct proc_maps_private));
}
-static pid_t pid_of_stack(struct proc_maps_private *priv,
- struct vm_area_struct *vma, bool is_pid)
+/*
+ * Indicate if the VMA is a stack for the given task; for
+ * /proc/PID/maps that is the stack of the main task.
+ */
+static int is_stack(struct proc_maps_private *priv,
+ struct vm_area_struct *vma, int is_pid)
{
- struct inode *inode = priv->inode;
- struct task_struct *task;
- pid_t ret = 0;
+ int stack = 0;
- rcu_read_lock();
- task = pid_task(proc_pid(inode), PIDTYPE_PID);
- if (task) {
- task = task_of_stack(task, vma, is_pid);
+ if (is_pid) {
+ stack = vma->vm_start <= vma->vm_mm->start_stack &&
+ vma->vm_end >= vma->vm_mm->start_stack;
+ } else {
+ struct inode *inode = priv->inode;
+ struct task_struct *task;
+
+ rcu_read_lock();
+ task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task)
- ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+ stack = vma_is_stack_for_task(vma, task);
+ rcu_read_unlock();
}
- rcu_read_unlock();
-
- return ret;
+ return stack;
}
static void
@@ -324,8 +341,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
name = arch_vma_name(vma);
if (!name) {
- pid_t tid;
-
if (!mm) {
name = "[vdso]";
goto done;
@@ -337,21 +352,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
goto done;
}
- tid = pid_of_stack(priv, vma, is_pid);
- if (tid != 0) {
- /*
- * Thread stack in /proc/PID/task/TID/maps or
- * the main process stack.
- */
- if (!is_pid || (vma->vm_start <= mm->start_stack &&
- vma->vm_end >= mm->start_stack)) {
- name = "[stack]";
- } else {
- /* Thread stack in /proc/PID/maps */
- seq_pad(m, ' ');
- seq_printf(m, "[stack:%d]", tid);
- }
- }
+ if (is_stack(priv, vma, is_pid))
+ name = "[stack]";
}
done:
@@ -451,12 +453,14 @@ struct mem_size_stats {
unsigned long private_hugetlb;
u64 pss;
u64 swap_pss;
+ bool check_shmem_swap;
};
static void smaps_account(struct mem_size_stats *mss, struct page *page,
- unsigned long size, bool young, bool dirty)
+ bool compound, bool young, bool dirty)
{
- int mapcount;
+ int i, nr = compound ? 1 << compound_order(page) : 1;
+ unsigned long size = nr * PAGE_SIZE;
if (PageAnon(page))
mss->anonymous += size;
@@ -465,26 +469,53 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
/* Accumulate the size in pages that have been accessed. */
if (young || page_is_young(page) || PageReferenced(page))
mss->referenced += size;
- mapcount = page_mapcount(page);
- if (mapcount >= 2) {
- u64 pss_delta;
- if (dirty || PageDirty(page))
- mss->shared_dirty += size;
- else
- mss->shared_clean += size;
- pss_delta = (u64)size << PSS_SHIFT;
- do_div(pss_delta, mapcount);
- mss->pss += pss_delta;
- } else {
+ /*
+ * page_count(page) == 1 guarantees the page is mapped exactly once.
+ * If any subpage of the compound page mapped with PTE it would elevate
+ * page_count().
+ */
+ if (page_count(page) == 1) {
if (dirty || PageDirty(page))
mss->private_dirty += size;
else
mss->private_clean += size;
mss->pss += (u64)size << PSS_SHIFT;
+ return;
+ }
+
+ for (i = 0; i < nr; i++, page++) {
+ int mapcount = page_mapcount(page);
+
+ if (mapcount >= 2) {
+ if (dirty || PageDirty(page))
+ mss->shared_dirty += PAGE_SIZE;
+ else
+ mss->shared_clean += PAGE_SIZE;
+ mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
+ } else {
+ if (dirty || PageDirty(page))
+ mss->private_dirty += PAGE_SIZE;
+ else
+ mss->private_clean += PAGE_SIZE;
+ mss->pss += PAGE_SIZE << PSS_SHIFT;
+ }
}
}
+#ifdef CONFIG_SHMEM
+static int smaps_pte_hole(unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct mem_size_stats *mss = walk->private;
+
+ mss->swap += shmem_partial_swap_usage(
+ walk->vma->vm_file->f_mapping, addr, end);
+
+ return 0;
+}
+#endif
+
static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct mm_walk *walk)
{
@@ -512,11 +543,25 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
}
} else if (is_migration_entry(swpent))
page = migration_entry_to_page(swpent);
+ } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
+ && pte_none(*pte))) {
+ page = find_get_entry(vma->vm_file->f_mapping,
+ linear_page_index(vma, addr));
+ if (!page)
+ return;
+
+ if (radix_tree_exceptional_entry(page))
+ mss->swap += PAGE_SIZE;
+ else
+ page_cache_release(page);
+
+ return;
}
if (!page)
return;
- smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte));
+
+ smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte));
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -532,8 +577,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
if (IS_ERR_OR_NULL(page))
return;
mss->anonymous_thp += HPAGE_PMD_SIZE;
- smaps_account(mss, page, HPAGE_PMD_SIZE,
- pmd_young(*pmd), pmd_dirty(*pmd));
+ smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
}
#else
static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
@@ -549,7 +593,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte_t *pte;
spinlock_t *ptl;
- if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
smaps_pmd_entry(pmd, addr, walk);
spin_unlock(ptl);
return 0;
@@ -671,6 +716,31 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
};
memset(&mss, 0, sizeof mss);
+
+#ifdef CONFIG_SHMEM
+ if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
+ /*
+ * For shared or readonly shmem mappings we know that all
+ * swapped out pages belong to the shmem object, and we can
+ * obtain the swap value much more efficiently. For private
+ * writable mappings, we might have COW pages that are
+ * not affected by the parent swapped out pages of the shmem
+ * object, so we have to distinguish them during the page walk.
+ * Unless we know that the shmem object (or the part mapped by
+ * our VMA) has no swapped out pages at all.
+ */
+ unsigned long shmem_swapped = shmem_swap_usage(vma);
+
+ if (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
+ !(vma->vm_flags & VM_WRITE)) {
+ mss.swap = shmem_swapped;
+ } else {
+ mss.check_shmem_swap = true;
+ smaps_walk.pte_hole = smaps_pte_hole;
+ }
+ }
+#endif
+
/* mmap_sem is held in m_start */
walk_page_vma(vma, &smaps_walk);
@@ -817,9 +887,6 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
pmd = pmd_wrprotect(pmd);
pmd = pmd_clear_soft_dirty(pmd);
- if (vma->vm_flags & VM_SOFTDIRTY)
- vma->vm_flags &= ~VM_SOFTDIRTY;
-
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
}
#else
@@ -838,7 +905,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
spinlock_t *ptl;
struct page *page;
- if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
clear_soft_dirty_pmd(vma, addr, pmd);
goto out;
@@ -1112,7 +1180,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
int err = 0;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (pmd_trans_huge_lock(pmdp, vma, &ptl) == 1) {
+ ptl = pmd_trans_huge_lock(pmdp, vma);
+ if (ptl) {
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;
@@ -1444,7 +1513,8 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
pte_t *orig_pte;
pte_t *pte;
- if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
pte_t huge_pte = *(pte_t *)pmd;
struct page *page;
@@ -1473,18 +1543,19 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long end, struct mm_walk *walk)
{
+ pte_t huge_pte = huge_ptep_get(pte);
struct numa_maps *md;
struct page *page;
- if (!pte_present(*pte))
+ if (!pte_present(huge_pte))
return 0;
- page = pte_page(*pte);
+ page = pte_page(huge_pte);
if (!page)
return 0;
md = walk->private;
- gather_stats(page, md, pte_dirty(*pte), 1);
+ gather_stats(page, md, pte_dirty(huge_pte), 1);
return 0;
}
@@ -1538,19 +1609,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
seq_file_path(m, file, "\n\t= ");
} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
seq_puts(m, " heap");
- } else {
- pid_t tid = pid_of_stack(proc_priv, vma, is_pid);
- if (tid != 0) {
- /*
- * Thread stack in /proc/PID/task/TID/maps or
- * the main process stack.
- */
- if (!is_pid || (vma->vm_start <= mm->start_stack &&
- vma->vm_end >= mm->start_stack))
- seq_puts(m, " stack");
- else
- seq_printf(m, " stack:%d", tid);
- }
+ } else if (is_stack(proc_priv, vma, is_pid)) {
+ seq_puts(m, " stack");
}
if (is_vm_hugetlb_page(vma))
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index e0d64c92e4f6..faacb0c0d857 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -123,23 +123,26 @@ unsigned long task_statm(struct mm_struct *mm,
return size;
}
-static pid_t pid_of_stack(struct proc_maps_private *priv,
- struct vm_area_struct *vma, bool is_pid)
+static int is_stack(struct proc_maps_private *priv,
+ struct vm_area_struct *vma, int is_pid)
{
- struct inode *inode = priv->inode;
- struct task_struct *task;
- pid_t ret = 0;
-
- rcu_read_lock();
- task = pid_task(proc_pid(inode), PIDTYPE_PID);
- if (task) {
- task = task_of_stack(task, vma, is_pid);
+ struct mm_struct *mm = vma->vm_mm;
+ int stack = 0;
+
+ if (is_pid) {
+ stack = vma->vm_start <= mm->start_stack &&
+ vma->vm_end >= mm->start_stack;
+ } else {
+ struct inode *inode = priv->inode;
+ struct task_struct *task;
+
+ rcu_read_lock();
+ task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task)
- ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+ stack = vma_is_stack_for_task(vma, task);
+ rcu_read_unlock();
}
- rcu_read_unlock();
-
- return ret;
+ return stack;
}
/*
@@ -181,21 +184,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
if (file) {
seq_pad(m, ' ');
seq_file_path(m, file, "");
- } else if (mm) {
- pid_t tid = pid_of_stack(priv, vma, is_pid);
-
- if (tid != 0) {
- seq_pad(m, ' ');
- /*
- * Thread stack in /proc/PID/task/TID/maps or
- * the main process stack.
- */
- if (!is_pid || (vma->vm_start <= mm->start_stack &&
- vma->vm_end >= mm->start_stack))
- seq_printf(m, "[stack]");
- else
- seq_printf(m, "[stack:%d]", tid);
- }
+ } else if (mm && is_stack(priv, vma, is_pid)) {
+ seq_pad(m, ' ');
+ seq_printf(m, "[stack]");
}
seq_putc(m, '\n');
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index 947b0f4fd0a1..e58a31e8fb2a 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -19,26 +19,29 @@ static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer,
return readlink_copy(buffer, buflen, tmp);
}
-static const char *proc_thread_self_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_thread_self_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+ struct pid_namespace *ns = inode->i_sb->s_fs_info;
pid_t tgid = task_tgid_nr_ns(current, ns);
pid_t pid = task_pid_nr_ns(current, ns);
char *name;
if (!pid)
return ERR_PTR(-ENOENT);
- name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);
- if (!name)
- return ERR_PTR(-ENOMEM);
+ name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF,
+ dentry ? GFP_KERNEL : GFP_ATOMIC);
+ if (unlikely(!name))
+ return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD);
sprintf(name, "%d/task/%d", tgid, pid);
- return *cookie = name;
+ set_delayed_call(done, kfree_link, name);
+ return name;
}
static const struct inode_operations proc_thread_self_inode_operations = {
.readlink = proc_thread_self_readlink,
- .follow_link = proc_thread_self_follow_link,
- .put_link = kfree_put_link,
+ .get_link = proc_thread_self_get_link,
};
static unsigned thread_self_inum;
@@ -49,7 +52,7 @@ int proc_setup_thread_self(struct super_block *s)
struct pid_namespace *ns = s->s_fs_info;
struct dentry *thread_self;
- mutex_lock(&root_inode->i_mutex);
+ inode_lock(root_inode);
thread_self = d_alloc_name(s->s_root, "thread-self");
if (thread_self) {
struct inode *inode = new_inode_pseudo(s);
@@ -68,7 +71,7 @@ int proc_setup_thread_self(struct super_block *s)
} else {
thread_self = ERR_PTR(-ENOMEM);
}
- mutex_unlock(&root_inode->i_mutex);
+ inode_unlock(root_inode);
if (IS_ERR(thread_self)) {
pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
return PTR_ERR(thread_self);