summaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/base.c122
-rw-r--r--fs/proc/generic.c15
-rw-r--r--fs/proc/inode.c1
-rw-r--r--fs/proc/internal.h6
-rw-r--r--fs/proc/page.c8
-rw-r--r--fs/proc/proc_sysctl.c8
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/proc/stat.c14
-rw-r--r--fs/proc/task_mmu.c55
9 files changed, 88 insertions, 143 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d295af993677..144a96732dd7 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -873,111 +873,6 @@ static const struct file_operations proc_environ_operations = {
.release = mem_release,
};
-static ssize_t oom_adjust_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
- char buffer[PROC_NUMBUF];
- size_t len;
- int oom_adjust = OOM_DISABLE;
- unsigned long flags;
-
- if (!task)
- return -ESRCH;
-
- if (lock_task_sighand(task, &flags)) {
- oom_adjust = task->signal->oom_adj;
- unlock_task_sighand(task, &flags);
- }
-
- put_task_struct(task);
-
- len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
-
- return simple_read_from_buffer(buf, count, ppos, buffer, len);
-}
-
-static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct task_struct *task;
- char buffer[PROC_NUMBUF];
- int oom_adjust;
- unsigned long flags;
- int err;
-
- memset(buffer, 0, sizeof(buffer));
- if (count > sizeof(buffer) - 1)
- count = sizeof(buffer) - 1;
- if (copy_from_user(buffer, buf, count)) {
- err = -EFAULT;
- goto out;
- }
-
- err = kstrtoint(strstrip(buffer), 0, &oom_adjust);
- if (err)
- goto out;
- if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
- oom_adjust != OOM_DISABLE) {
- err = -EINVAL;
- goto out;
- }
-
- task = get_proc_task(file->f_path.dentry->d_inode);
- if (!task) {
- err = -ESRCH;
- goto out;
- }
-
- task_lock(task);
- if (!task->mm) {
- err = -EINVAL;
- goto err_task_lock;
- }
-
- if (!lock_task_sighand(task, &flags)) {
- err = -ESRCH;
- goto err_task_lock;
- }
-
- if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
- err = -EACCES;
- goto err_sighand;
- }
-
- /*
- * Warn that /proc/pid/oom_adj is deprecated, see
- * Documentation/feature-removal-schedule.txt.
- */
- printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
- current->comm, task_pid_nr(current), task_pid_nr(task),
- task_pid_nr(task));
- task->signal->oom_adj = oom_adjust;
- /*
- * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
- * value is always attainable.
- */
- if (task->signal->oom_adj == OOM_ADJUST_MAX)
- task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX;
- else
- task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
- -OOM_DISABLE;
- trace_oom_score_adj_update(task);
-err_sighand:
- unlock_task_sighand(task, &flags);
-err_task_lock:
- task_unlock(task);
- put_task_struct(task);
-out:
- return err < 0 ? err : count;
-}
-
-static const struct file_operations proc_oom_adjust_operations = {
- .read = oom_adjust_read,
- .write = oom_adjust_write,
- .llseek = generic_file_llseek,
-};
-
static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -1051,15 +946,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
task->signal->oom_score_adj_min = oom_score_adj;
trace_oom_score_adj_update(task);
- /*
- * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
- * always attainable.
- */
- if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
- task->signal->oom_adj = OOM_DISABLE;
- else
- task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) /
- OOM_SCORE_ADJ_MAX;
+
err_sighand:
unlock_task_sighand(task, &flags);
err_task_lock:
@@ -2371,7 +2258,8 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name = ERR_PTR(-ENOENT);
if (tgid) {
- name = __getname();
+ /* 11 for max length of signed int in decimal + NULL term */
+ name = kmalloc(12, GFP_KERNEL);
if (!name)
name = ERR_PTR(-ENOMEM);
else
@@ -2386,7 +2274,7 @@ static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
{
char *s = nd_get_link(nd);
if (!IS_ERR(s))
- __putname(s);
+ kfree(s);
}
static const struct inode_operations proc_self_inode_operations = {
@@ -2710,7 +2598,6 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("cgroup", S_IRUGO, proc_cgroup_operations),
#endif
INF("oom_score", S_IRUGO, proc_oom_score),
- REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
@@ -3077,7 +2964,6 @@ static const struct pid_entry tid_base_stuff[] = {
REG("cgroup", S_IRUGO, proc_cgroup_operations),
#endif
INF("oom_score", S_IRUGO, proc_oom_score),
- REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index b3647fe6a608..0d80cef4cfb9 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -427,7 +427,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
pde_get(de);
spin_unlock(&proc_subdir_lock);
- error = -EINVAL;
+ error = -ENOMEM;
inode = proc_get_inode(dir->i_sb, de);
goto out_unlock;
}
@@ -605,7 +605,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
unsigned int len;
/* make sure name is valid */
- if (!name || !strlen(name)) goto out;
+ if (!name || !strlen(name))
+ goto out;
if (xlate_proc_name(name, parent, &fn) != 0)
goto out;
@@ -616,20 +617,18 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
len = strlen(fn);
- ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
- if (!ent) goto out;
+ ent = kzalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
+ if (!ent)
+ goto out;
- memset(ent, 0, sizeof(struct proc_dir_entry));
memcpy(ent->name, fn, len + 1);
ent->namelen = len;
ent->mode = mode;
ent->nlink = nlink;
atomic_set(&ent->count, 1);
- ent->pde_users = 0;
spin_lock_init(&ent->pde_unload_lock);
- ent->pde_unload_completion = NULL;
INIT_LIST_HEAD(&ent->pde_openers);
- out:
+out:
return ent;
}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 7ac817b64a71..3b22bbdee9ec 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -450,7 +450,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
return NULL;
if (inode->i_state & I_NEW) {
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
- PROC_I(inode)->fd = 0;
PROC_I(inode)->pde = de;
if (de->mode) {
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 67925a7bd8cb..43973b084abf 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -12,6 +12,7 @@
#include <linux/sched.h>
#include <linux/proc_fs.h>
struct ctl_table_header;
+struct mempolicy;
extern struct proc_dir_entry proc_root;
#ifdef CONFIG_PROC_SYSCTL
@@ -74,6 +75,9 @@ struct proc_maps_private {
#ifdef CONFIG_MMU
struct vm_area_struct *tail_vma;
#endif
+#ifdef CONFIG_NUMA
+ struct mempolicy *task_mempolicy;
+#endif
};
void proc_init_inodecache(void);
@@ -103,7 +107,7 @@ static inline int task_dumpable(struct task_struct *task)
if (mm)
dumpable = get_dumpable(mm);
task_unlock(task);
- if(dumpable == 1)
+ if (dumpable == SUID_DUMPABLE_ENABLED)
return 1;
return 0;
}
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 7fcd0d60a968..b8730d9ebaee 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -115,7 +115,13 @@ u64 stable_page_flags(struct page *page)
u |= 1 << KPF_COMPOUND_TAIL;
if (PageHuge(page))
u |= 1 << KPF_HUGE;
- else if (PageTransCompound(page))
+ /*
+ * PageTransCompound can be true for non-huge compound pages (slab
+ * pages or pages allocated by drivers with __GFP_COMP) because it
+ * just checks PG_head/PG_tail, so we need to check PageLRU to make
+ * sure a given page is a thp, not a non-huge compound page.
+ */
+ else if (PageTransCompound(page) && PageLRU(compound_trans_head(page)))
u |= 1 << KPF_THP;
/*
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index eb7cc91b7258..a781bdf06694 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -142,6 +142,7 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
}
rb_link_node(node, parent, p);
+ rb_insert_color(node, &head->parent->root);
return 0;
}
@@ -168,10 +169,8 @@ static void init_header(struct ctl_table_header *head,
head->node = node;
if (node) {
struct ctl_table *entry;
- for (entry = table; entry->procname; entry++, node++) {
- rb_init_node(&node->node);
+ for (entry = table; entry->procname; entry++, node++)
node->header = head;
- }
}
}
@@ -266,8 +265,7 @@ void sysctl_head_put(struct ctl_table_header *head)
static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
{
- if (!head)
- BUG();
+ BUG_ON(!head);
spin_lock(&sysctl_lock);
if (!use_table(head))
head = ERR_PTR(-ENOENT);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9a2d9fd7cadd..9889a92d2e01 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -61,7 +61,7 @@ static int proc_parse_options(char *options, struct pid_namespace *pid)
if (!*p)
continue;
- args[0].to = args[0].from = 0;
+ args[0].to = args[0].from = NULL;
token = match_token(p, tokens, args);
switch (token) {
case Opt_gid:
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 64c3b3172367..e296572c73ed 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -45,10 +45,13 @@ static cputime64_t get_iowait_time(int cpu)
static u64 get_idle_time(int cpu)
{
- u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
+ u64 idle, idle_time = -1ULL;
+
+ if (cpu_online(cpu))
+ idle_time = get_cpu_idle_time_us(cpu, NULL);
if (idle_time == -1ULL)
- /* !NO_HZ so we can rely on cpustat.idle */
+ /* !NO_HZ or cpu offline so we can rely on cpustat.idle */
idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
else
idle = usecs_to_cputime64(idle_time);
@@ -58,10 +61,13 @@ static u64 get_idle_time(int cpu)
static u64 get_iowait_time(int cpu)
{
- u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL);
+ u64 iowait, iowait_time = -1ULL;
+
+ if (cpu_online(cpu))
+ iowait_time = get_cpu_iowait_time_us(cpu, NULL);
if (iowait_time == -1ULL)
- /* !NO_HZ so we can rely on cpustat.iowait */
+ /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
else
iowait = usecs_to_cputime64(iowait_time);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4540b8f76f16..90c63f9392a5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -54,7 +54,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
"VmPTE:\t%8lu kB\n"
"VmSwap:\t%8lu kB\n",
hiwater_vm << (PAGE_SHIFT-10),
- (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
+ total_vm << (PAGE_SHIFT-10),
mm->locked_vm << (PAGE_SHIFT-10),
mm->pinned_vm << (PAGE_SHIFT-10),
hiwater_rss << (PAGE_SHIFT-10),
@@ -90,10 +90,55 @@ static void pad_len_spaces(struct seq_file *m, int len)
seq_printf(m, "%*c", len, ' ');
}
+#ifdef CONFIG_NUMA
+/*
+ * These functions are for numa_maps but called in generic **maps seq_file
+ * ->start(), ->stop() ops.
+ *
+ * numa_maps scans all vmas under mmap_sem and checks their mempolicy.
+ * Each mempolicy object is controlled by reference counting. The problem here
+ * is how to avoid accessing dead mempolicy object.
+ *
+ * Because we're holding mmap_sem while reading seq_file, it's safe to access
+ * each vma's mempolicy, no vma objects will never drop refs to mempolicy.
+ *
+ * A task's mempolicy (task->mempolicy) has different behavior. task->mempolicy
+ * is set and replaced under mmap_sem but unrefed and cleared under task_lock().
+ * So, without task_lock(), we cannot trust get_vma_policy() because we cannot
+ * gurantee the task never exits under us. But taking task_lock() around
+ * get_vma_plicy() causes lock order problem.
+ *
+ * To access task->mempolicy without lock, we hold a reference count of an
+ * object pointed by task->mempolicy and remember it. This will guarantee
+ * that task->mempolicy points to an alive object or NULL in numa_maps accesses.
+ */
+static void hold_task_mempolicy(struct proc_maps_private *priv)
+{
+ struct task_struct *task = priv->task;
+
+ task_lock(task);
+ priv->task_mempolicy = task->mempolicy;
+ mpol_get(priv->task_mempolicy);
+ task_unlock(task);
+}
+static void release_task_mempolicy(struct proc_maps_private *priv)
+{
+ mpol_put(priv->task_mempolicy);
+}
+#else
+static void hold_task_mempolicy(struct proc_maps_private *priv)
+{
+}
+static void release_task_mempolicy(struct proc_maps_private *priv)
+{
+}
+#endif
+
static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
{
if (vma && vma != priv->tail_vma) {
struct mm_struct *mm = vma->vm_mm;
+ release_task_mempolicy(priv);
up_read(&mm->mmap_sem);
mmput(mm);
}
@@ -132,7 +177,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
tail_vma = get_gate_vma(priv->task->mm);
priv->tail_vma = tail_vma;
-
+ hold_task_mempolicy(priv);
/* Start with last addr hint */
vma = find_vma(mm, last_addr);
if (last_addr && vma) {
@@ -159,6 +204,7 @@ out:
if (vma)
return vma;
+ release_task_mempolicy(priv);
/* End of vmas has been reached */
m->version = (tail_vma != NULL)? 0: -1UL;
up_read(&mm->mmap_sem);
@@ -1158,6 +1204,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
struct vm_area_struct *vma = v;
struct numa_maps *md = &numa_priv->md;
struct file *file = vma->vm_file;
+ struct task_struct *task = proc_priv->task;
struct mm_struct *mm = vma->vm_mm;
struct mm_walk walk = {};
struct mempolicy *pol;
@@ -1177,7 +1224,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
walk.private = md;
walk.mm = mm;
- pol = get_vma_policy(proc_priv->task, vma, vma->vm_start);
+ pol = get_vma_policy(task, vma, vma->vm_start);
mpol_to_str(buffer, sizeof(buffer), pol, 0);
mpol_cond_put(pol);
@@ -1189,7 +1236,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
seq_printf(m, " heap");
} else {
- pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid);
+ pid_t tid = vm_is_stack(task, vma, is_pid);
if (tid != 0) {
/*
* Thread stack in /proc/PID/task/TID/maps or