diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/array.c | 201 | ||||
-rw-r--r-- | fs/proc/base.c | 171 | ||||
-rw-r--r-- | fs/proc/fd.c | 14 | ||||
-rw-r--r-- | fs/proc/fd.h | 2 | ||||
-rw-r--r-- | fs/proc/generic.c | 5 | ||||
-rw-r--r-- | fs/proc/inode.c | 40 | ||||
-rw-r--r-- | fs/proc/internal.h | 8 | ||||
-rw-r--r-- | fs/proc/meminfo.c | 211 | ||||
-rw-r--r-- | fs/proc/namespaces.c | 3 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 13 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 23 | ||||
-rw-r--r-- | fs/proc/root.c | 1 | ||||
-rw-r--r-- | fs/proc/self.c | 2 | ||||
-rw-r--r-- | fs/proc/stat.c | 49 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 40 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 28 | ||||
-rw-r--r-- | fs/proc/thread_self.c | 2 |
17 files changed, 422 insertions, 391 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c index 88c7de12197b..51a4213afa2e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -186,51 +186,45 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, task_unlock(p); rcu_read_unlock(); - seq_printf(m, - "State:\t%s\n" - "Tgid:\t%d\n" - "Ngid:\t%d\n" - "Pid:\t%d\n" - "PPid:\t%d\n" - "TracerPid:\t%d\n" - "Uid:\t%d\t%d\t%d\t%d\n" - "Gid:\t%d\t%d\t%d\t%d\n" - "FDSize:\t%d\nGroups:\t", - get_task_state(p), - tgid, ngid, pid_nr_ns(pid, ns), ppid, tpid, - from_kuid_munged(user_ns, cred->uid), - from_kuid_munged(user_ns, cred->euid), - from_kuid_munged(user_ns, cred->suid), - from_kuid_munged(user_ns, cred->fsuid), - from_kgid_munged(user_ns, cred->gid), - from_kgid_munged(user_ns, cred->egid), - from_kgid_munged(user_ns, cred->sgid), - from_kgid_munged(user_ns, cred->fsgid), - max_fds); - + seq_printf(m, "State:\t%s", get_task_state(p)); + + seq_put_decimal_ull(m, "\nTgid:\t", tgid); + seq_put_decimal_ull(m, "\nNgid:\t", ngid); + seq_put_decimal_ull(m, "\nPid:\t", pid_nr_ns(pid, ns)); + seq_put_decimal_ull(m, "\nPPid:\t", ppid); + seq_put_decimal_ull(m, "\nTracerPid:\t", tpid); + seq_put_decimal_ull(m, "\nUid:\t", from_kuid_munged(user_ns, cred->uid)); + seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->euid)); + seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->suid)); + seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->fsuid)); + seq_put_decimal_ull(m, "\nGid:\t", from_kgid_munged(user_ns, cred->gid)); + seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->egid)); + seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->sgid)); + seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->fsgid)); + seq_put_decimal_ull(m, "\nFDSize:\t", max_fds); + + seq_puts(m, "\nGroups:\t"); group_info = cred->group_info; for (g = 0; g < group_info->ngroups; g++) - seq_printf(m, "%d ", - from_kgid_munged(user_ns, GROUP_AT(group_info, g))); + seq_put_decimal_ull(m, g ? " " : "", + from_kgid_munged(user_ns, group_info->gid[g])); put_cred(cred); + /* Trailing space shouldn't have been added in the first place. */ + seq_putc(m, ' '); #ifdef CONFIG_PID_NS seq_puts(m, "\nNStgid:"); for (g = ns->level; g <= pid->level; g++) - seq_printf(m, "\t%d", - task_tgid_nr_ns(p, pid->numbers[g].ns)); + seq_put_decimal_ull(m, "\t", task_tgid_nr_ns(p, pid->numbers[g].ns)); seq_puts(m, "\nNSpid:"); for (g = ns->level; g <= pid->level; g++) - seq_printf(m, "\t%d", - task_pid_nr_ns(p, pid->numbers[g].ns)); + seq_put_decimal_ull(m, "\t", task_pid_nr_ns(p, pid->numbers[g].ns)); seq_puts(m, "\nNSpgid:"); for (g = ns->level; g <= pid->level; g++) - seq_printf(m, "\t%d", - task_pgrp_nr_ns(p, pid->numbers[g].ns)); + seq_put_decimal_ull(m, "\t", task_pgrp_nr_ns(p, pid->numbers[g].ns)); seq_puts(m, "\nNSsid:"); for (g = ns->level; g <= pid->level; g++) - seq_printf(m, "\t%d", - task_session_nr_ns(p, pid->numbers[g].ns)); + seq_put_decimal_ull(m, "\t", task_session_nr_ns(p, pid->numbers[g].ns)); #endif seq_putc(m, '\n'); } @@ -251,7 +245,7 @@ void render_sigset_t(struct seq_file *m, const char *header, if (sigismember(set, i+2)) x |= 2; if (sigismember(set, i+3)) x |= 4; if (sigismember(set, i+4)) x |= 8; - seq_printf(m, "%x", x); + seq_putc(m, hex_asc[x]); } while (i >= 4); seq_putc(m, '\n'); @@ -299,11 +293,12 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) unlock_task_sighand(p, &flags); } - seq_printf(m, "Threads:\t%d\n", num_threads); - seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); + seq_put_decimal_ull(m, "Threads:\t", num_threads); + seq_put_decimal_ull(m, "\nSigQ:\t", qsize); + seq_put_decimal_ull(m, "/", qlim); /* render them all */ - render_sigset_t(m, "SigPnd:\t", &pending); + render_sigset_t(m, "\nSigPnd:\t", &pending); render_sigset_t(m, "ShdPnd:\t", &shpending); render_sigset_t(m, "SigBlk:\t", &blocked); render_sigset_t(m, "SigIgn:\t", &ignored); @@ -347,18 +342,19 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) static inline void task_seccomp(struct seq_file *m, struct task_struct *p) { + seq_put_decimal_ull(m, "NoNewPrivs:\t", task_no_new_privs(p)); #ifdef CONFIG_SECCOMP - seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode); + seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode); #endif + seq_putc(m, '\n'); } static inline void task_context_switch_counts(struct seq_file *m, struct task_struct *p) { - seq_printf(m, "voluntary_ctxt_switches:\t%lu\n" - "nonvoluntary_ctxt_switches:\t%lu\n", - p->nvcsw, - p->nivcsw); + seq_put_decimal_ull(m, "voluntary_ctxt_switches:\t", p->nvcsw); + seq_put_decimal_ull(m, "\nnonvoluntary_ctxt_switches:\t", p->nivcsw); + seq_putc(m, '\n'); } static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) @@ -417,10 +413,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, mm = get_task_mm(task); if (mm) { vsize = task_vsize(mm); - if (permitted) { - eip = KSTK_EIP(task); - esp = KSTK_ESP(task); - } + /* + * esp and eip are intentionally zeroed out. There is no + * non-racy way to read them without freezing the task. + * Programs that need reliable values can use ptrace(2). + */ } get_task_comm(tcomm, task); @@ -490,41 +487,41 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, start_time = nsec_to_clock_t(task->real_start_time); seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state); - seq_put_decimal_ll(m, ' ', ppid); - seq_put_decimal_ll(m, ' ', pgid); - seq_put_decimal_ll(m, ' ', sid); - seq_put_decimal_ll(m, ' ', tty_nr); - seq_put_decimal_ll(m, ' ', tty_pgrp); - seq_put_decimal_ull(m, ' ', task->flags); - seq_put_decimal_ull(m, ' ', min_flt); - seq_put_decimal_ull(m, ' ', cmin_flt); - seq_put_decimal_ull(m, ' ', maj_flt); - seq_put_decimal_ull(m, ' ', cmaj_flt); - seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime)); - seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime)); - seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime)); - seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime)); - seq_put_decimal_ll(m, ' ', priority); - seq_put_decimal_ll(m, ' ', nice); - seq_put_decimal_ll(m, ' ', num_threads); - seq_put_decimal_ull(m, ' ', 0); - seq_put_decimal_ull(m, ' ', start_time); - seq_put_decimal_ull(m, ' ', vsize); - seq_put_decimal_ull(m, ' ', mm ? get_mm_rss(mm) : 0); - seq_put_decimal_ull(m, ' ', rsslim); - seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0); - seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0); - seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0); - seq_put_decimal_ull(m, ' ', esp); - seq_put_decimal_ull(m, ' ', eip); + seq_put_decimal_ll(m, " ", ppid); + seq_put_decimal_ll(m, " ", pgid); + seq_put_decimal_ll(m, " ", sid); + seq_put_decimal_ll(m, " ", tty_nr); + seq_put_decimal_ll(m, " ", tty_pgrp); + seq_put_decimal_ull(m, " ", task->flags); + seq_put_decimal_ull(m, " ", min_flt); + seq_put_decimal_ull(m, " ", cmin_flt); + seq_put_decimal_ull(m, " ", maj_flt); + seq_put_decimal_ull(m, " ", cmaj_flt); + seq_put_decimal_ull(m, " ", cputime_to_clock_t(utime)); + seq_put_decimal_ull(m, " ", cputime_to_clock_t(stime)); + seq_put_decimal_ll(m, " ", cputime_to_clock_t(cutime)); + seq_put_decimal_ll(m, " ", cputime_to_clock_t(cstime)); + seq_put_decimal_ll(m, " ", priority); + seq_put_decimal_ll(m, " ", nice); + seq_put_decimal_ll(m, " ", num_threads); + seq_put_decimal_ull(m, " ", 0); + seq_put_decimal_ull(m, " ", start_time); + seq_put_decimal_ull(m, " ", vsize); + seq_put_decimal_ull(m, " ", mm ? get_mm_rss(mm) : 0); + seq_put_decimal_ull(m, " ", rsslim); + seq_put_decimal_ull(m, " ", mm ? (permitted ? mm->start_code : 1) : 0); + seq_put_decimal_ull(m, " ", mm ? (permitted ? mm->end_code : 1) : 0); + seq_put_decimal_ull(m, " ", (permitted && mm) ? mm->start_stack : 0); + seq_put_decimal_ull(m, " ", esp); + seq_put_decimal_ull(m, " ", eip); /* The signal information here is obsolete. * It must be decimal for Linux 2.0 compatibility. * Use /proc/#/status for real-time signals. */ - seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL); - seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL); - seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL); - seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL); + seq_put_decimal_ull(m, " ", task->pending.signal.sig[0] & 0x7fffffffUL); + seq_put_decimal_ull(m, " ", task->blocked.sig[0] & 0x7fffffffUL); + seq_put_decimal_ull(m, " ", sigign.sig[0] & 0x7fffffffUL); + seq_put_decimal_ull(m, " ", sigcatch.sig[0] & 0x7fffffffUL); /* * We used to output the absolute kernel address, but that's an @@ -538,31 +535,31 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, else seq_puts(m, " 0"); - seq_put_decimal_ull(m, ' ', 0); - seq_put_decimal_ull(m, ' ', 0); - seq_put_decimal_ll(m, ' ', task->exit_signal); - seq_put_decimal_ll(m, ' ', task_cpu(task)); - seq_put_decimal_ull(m, ' ', task->rt_priority); - seq_put_decimal_ull(m, ' ', task->policy); - seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task)); - seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime)); - seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime)); + seq_put_decimal_ull(m, " ", 0); + seq_put_decimal_ull(m, " ", 0); + seq_put_decimal_ll(m, " ", task->exit_signal); + seq_put_decimal_ll(m, " ", task_cpu(task)); + seq_put_decimal_ull(m, " ", task->rt_priority); + seq_put_decimal_ull(m, " ", task->policy); + seq_put_decimal_ull(m, " ", delayacct_blkio_ticks(task)); + seq_put_decimal_ull(m, " ", cputime_to_clock_t(gtime)); + seq_put_decimal_ll(m, " ", cputime_to_clock_t(cgtime)); if (mm && permitted) { - seq_put_decimal_ull(m, ' ', mm->start_data); - seq_put_decimal_ull(m, ' ', mm->end_data); - seq_put_decimal_ull(m, ' ', mm->start_brk); - seq_put_decimal_ull(m, ' ', mm->arg_start); - seq_put_decimal_ull(m, ' ', mm->arg_end); - seq_put_decimal_ull(m, ' ', mm->env_start); - seq_put_decimal_ull(m, ' ', mm->env_end); + seq_put_decimal_ull(m, " ", mm->start_data); + seq_put_decimal_ull(m, " ", mm->end_data); + seq_put_decimal_ull(m, " ", mm->start_brk); + seq_put_decimal_ull(m, " ", mm->arg_start); + seq_put_decimal_ull(m, " ", mm->arg_end); + seq_put_decimal_ull(m, " ", mm->env_start); + seq_put_decimal_ull(m, " ", mm->env_end); } else - seq_printf(m, " 0 0 0 0 0 0 0"); + seq_puts(m, " 0 0 0 0 0 0 0"); if (permitted) - seq_put_decimal_ll(m, ' ', task->exit_code); + seq_put_decimal_ll(m, " ", task->exit_code); else - seq_put_decimal_ll(m, ' ', 0); + seq_puts(m, " 0"); seq_putc(m, '\n'); if (mm) @@ -598,13 +595,13 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n", * size, resident, shared, text, data); */ - seq_put_decimal_ull(m, 0, size); - seq_put_decimal_ull(m, ' ', resident); - seq_put_decimal_ull(m, ' ', shared); - seq_put_decimal_ull(m, ' ', text); - seq_put_decimal_ull(m, ' ', 0); - seq_put_decimal_ull(m, ' ', data); - seq_put_decimal_ull(m, ' ', 0); + seq_put_decimal_ull(m, "", size); + seq_put_decimal_ull(m, " ", resident); + seq_put_decimal_ull(m, " ", shared); + seq_put_decimal_ull(m, " ", text); + seq_put_decimal_ull(m, " ", 0); + seq_put_decimal_ull(m, " ", data); + seq_put_decimal_ull(m, " ", 0); seq_putc(m, '\n'); return 0; diff --git a/fs/proc/base.c b/fs/proc/base.c index 9cdb3e40899a..5ea836362870 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -104,9 +104,12 @@ * in /proc for a task before it execs a suid executable. */ +static u8 nlink_tid; +static u8 nlink_tgid; + struct pid_entry { const char *name; - int len; + unsigned int len; umode_t mode; const struct inode_operations *iop; const struct file_operations *fop; @@ -139,13 +142,13 @@ struct pid_entry { * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. */ -static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, +static unsigned int __init pid_entry_nlink(const struct pid_entry *entries, unsigned int n) { unsigned int i; unsigned int count; - count = 0; + count = 2; for (i = 0; i < n; ++i) { if (S_ISDIR(entries[i].mode)) ++count; @@ -400,23 +403,6 @@ static const struct file_operations proc_pid_cmdline_ops = { .llseek = generic_file_llseek, }; -static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns, - struct pid *pid, struct task_struct *task) -{ - struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); - if (mm && !IS_ERR(mm)) { - unsigned int nwords = 0; - do { - nwords += 2; - } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ - seq_write(m, mm->saved_auxv, nwords * sizeof(mm->saved_auxv[0])); - mmput(mm); - return 0; - } else - return PTR_ERR(mm); -} - - #ifdef CONFIG_KALLSYMS /* * Provides a wchan file via kallsyms in a proper one-value-per-file format. @@ -483,7 +469,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, save_stack_trace_tsk(task, &trace); for (i = 0; i < trace.nr_entries; i++) { - seq_printf(m, "[<%pK>] %pS\n", + seq_printf(m, "[<%pK>] %pB\n", (void *)entries[i], (void *)entries[i]); } unlock_trace(task); @@ -709,7 +695,7 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_MODE) return -EPERM; - error = inode_change_ok(inode, attr); + error = setattr_prepare(dentry, attr); if (error) return error; @@ -849,6 +835,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, unsigned long addr = *ppos; ssize_t copied; char *page; + unsigned int flags; if (!mm) return 0; @@ -861,6 +848,11 @@ static ssize_t mem_rw(struct file *file, char __user *buf, if (!atomic_inc_not_zero(&mm->mm_users)) goto free; + /* Maybe we should limit FOLL_FORCE to actual ptrace users? */ + flags = FOLL_FORCE; + if (write) + flags |= FOLL_WRITE; + while (count > 0) { int this_len = min_t(int, count, PAGE_SIZE); @@ -869,7 +861,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, break; } - this_len = access_remote_vm(mm, addr, page, this_len, write); + this_len = access_remote_vm(mm, addr, page, this_len, flags); if (!this_len) { if (!copied) copied = -EIO; @@ -981,8 +973,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, max_len = min_t(size_t, PAGE_SIZE, count); this_len = min(max_len, this_len); - retval = access_remote_vm(mm, (env_start + src), - page, this_len, 0); + retval = access_remote_vm(mm, (env_start + src), page, this_len, 0); if (retval <= 0) { ret = retval; @@ -1014,6 +1005,33 @@ static const struct file_operations proc_environ_operations = { .release = mem_release, }; +static int auxv_open(struct inode *inode, struct file *file) +{ + return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); +} + +static ssize_t auxv_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct mm_struct *mm = file->private_data; + unsigned int nwords = 0; + + if (!mm) + return 0; + do { + nwords += 2; + } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ + return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv, + nwords * sizeof(mm->saved_auxv[0])); +} + +static const struct file_operations proc_auxv_operations = { + .open = auxv_open, + .read = auxv_read, + .llseek = generic_file_llseek, + .release = mem_release, +}; + static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -1649,7 +1667,8 @@ const struct inode_operations proc_pid_link_inode_operations = { /* building an inode */ -struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) +struct inode *proc_pid_make_inode(struct super_block * sb, + struct task_struct *task, umode_t mode) { struct inode * inode; struct proc_inode *ei; @@ -1663,8 +1682,9 @@ struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *t /* Common stuff */ ei = PROC_I(inode); + inode->i_mode = mode; inode->i_ino = get_next_ino(); - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); inode->i_op = &proc_def_inode_operations; /* @@ -1952,7 +1972,7 @@ out: struct map_files_info { fmode_t mode; - unsigned long len; + unsigned int len; unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ }; @@ -1989,7 +2009,9 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, struct proc_inode *ei; struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task); + inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK | + ((mode & FMODE_READ ) ? S_IRUSR : 0) | + ((mode & FMODE_WRITE) ? S_IWUSR : 0)); if (!inode) return -ENOENT; @@ -1998,12 +2020,6 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, inode->i_op = &proc_map_files_link_inode_operations; inode->i_size = 64; - inode->i_mode = S_IFLNK; - - if (mode & FMODE_READ) - inode->i_mode |= S_IRUSR; - if (mode & FMODE_WRITE) - inode->i_mode |= S_IWUSR; d_set_d_op(dentry, &tid_map_files_dentry_operations); d_add(dentry, inode); @@ -2280,16 +2296,27 @@ static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, if (!p) return -ESRCH; - if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) { - task_lock(p); - if (slack_ns == 0) - p->timer_slack_ns = p->default_timer_slack_ns; - else - p->timer_slack_ns = slack_ns; - task_unlock(p); - } else - count = -EPERM; + if (p != current) { + if (!capable(CAP_SYS_NICE)) { + count = -EPERM; + goto out; + } + err = security_task_setscheduler(p); + if (err) { + count = err; + goto out; + } + } + + task_lock(p); + if (slack_ns == 0) + p->timer_slack_ns = p->default_timer_slack_ns; + else + p->timer_slack_ns = slack_ns; + task_unlock(p); + +out: put_task_struct(p); return count; @@ -2299,19 +2326,28 @@ static int timerslack_ns_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct task_struct *p; - int err = 0; + int err = 0; p = get_proc_task(inode); if (!p) return -ESRCH; - if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) { - task_lock(p); - seq_printf(m, "%llu\n", p->timer_slack_ns); - task_unlock(p); - } else - err = -EPERM; + if (p != current) { + + if (!capable(CAP_SYS_NICE)) { + err = -EPERM; + goto out; + } + err = security_task_getscheduler(p); + if (err) + goto out; + } + + task_lock(p); + seq_printf(m, "%llu\n", p->timer_slack_ns); + task_unlock(p); +out: put_task_struct(p); return err; @@ -2337,12 +2373,11 @@ static int proc_pident_instantiate(struct inode *dir, struct inode *inode; struct proc_inode *ei; - inode = proc_pid_make_inode(dir->i_sb, task); + inode = proc_pid_make_inode(dir->i_sb, task, p->mode); if (!inode) goto out; ei = PROC_I(inode); - inode->i_mode = p->mode; if (S_ISDIR(inode->i_mode)) set_nlink(inode, 2); /* Use getattr to fix if necessary */ if (p->iop) @@ -2377,14 +2412,14 @@ static struct dentry *proc_pident_lookup(struct inode *dir, * Yes, it does not scale. And it should not. Don't add * new entries into /proc/<tgid>/ without very good reasons. */ - last = &ents[nents - 1]; - for (p = ents; p <= last; p++) { + last = &ents[nents]; + for (p = ents; p < last; p++) { if (p->len != dentry->d_name.len) continue; if (!memcmp(dentry->d_name.name, p->name, p->len)) break; } - if (p > last) + if (p >= last) goto out; error = proc_pident_instantiate(dir, dentry, task, p); @@ -2409,7 +2444,7 @@ static int proc_pident_readdir(struct file *file, struct dir_context *ctx, if (ctx->pos >= nents + 2) goto out; - for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) { + for (p = ents + (ctx->pos - 2); p < ents + nents; p++) { if (!proc_fill_cache(file, ctx, p->name, p->len, proc_pident_instantiate, task, p)) break; @@ -2822,7 +2857,7 @@ static const struct pid_entry tgid_base_stuff[] = { DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), #endif REG("environ", S_IRUSR, proc_environ_operations), - ONE("auxv", S_IRUSR, proc_pid_auxv), + REG("auxv", S_IRUSR, proc_auxv_operations), ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUSR, proc_pid_personality), ONE("limits", S_IRUGO, proc_pid_limits), @@ -3024,17 +3059,15 @@ static int proc_pid_instantiate(struct inode *dir, { struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task); + inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) goto out; - inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; - set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff, - ARRAY_SIZE(tgid_base_stuff))); + set_nlink(inode, nlink_tgid); d_set_d_op(dentry, &pid_dentry_operations); @@ -3210,7 +3243,7 @@ static const struct pid_entry tid_base_stuff[] = { DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), #endif REG("environ", S_IRUSR, proc_environ_operations), - ONE("auxv", S_IRUSR, proc_pid_auxv), + REG("auxv", S_IRUSR, proc_auxv_operations), ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUSR, proc_pid_personality), ONE("limits", S_IRUGO, proc_pid_limits), @@ -3317,17 +3350,15 @@ static int proc_task_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task); + inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) goto out; - inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; inode->i_flags|=S_IMMUTABLE; - set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff, - ARRAY_SIZE(tid_base_stuff))); + set_nlink(inode, nlink_tid); d_set_d_op(dentry, &pid_dentry_operations); @@ -3517,3 +3548,9 @@ static const struct file_operations proc_task_operations = { .iterate_shared = proc_task_readdir, .llseek = generic_file_llseek, }; + +void __init set_proc_pid_nlink(void) +{ + nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); + nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); +} diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 01df23cc81f6..4274f83bf100 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -31,7 +31,7 @@ static int seq_show(struct seq_file *m, void *v) put_task_struct(task); if (files) { - int fd = proc_fd(m->private); + unsigned int fd = proc_fd(m->private); spin_lock(&files->file_lock); file = fcheck_files(files, fd); @@ -86,7 +86,7 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) struct task_struct *task; const struct cred *cred; struct inode *inode; - int fd; + unsigned int fd; if (flags & LOOKUP_RCU) return -ECHILD; @@ -158,7 +158,7 @@ static int proc_fd_link(struct dentry *dentry, struct path *path) } if (files) { - int fd = proc_fd(d_inode(dentry)); + unsigned int fd = proc_fd(d_inode(dentry)); struct file *fd_file; spin_lock(&files->file_lock); @@ -183,14 +183,13 @@ proc_fd_instantiate(struct inode *dir, struct dentry *dentry, struct proc_inode *ei; struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task); + inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK); if (!inode) goto out; ei = PROC_I(inode); ei->fd = fd; - inode->i_mode = S_IFLNK; inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; @@ -253,7 +252,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, continue; rcu_read_unlock(); - len = snprintf(name, sizeof(name), "%d", fd); + len = snprintf(name, sizeof(name), "%u", fd); if (!proc_fill_cache(file, ctx, name, len, instantiate, p, (void *)(unsigned long)fd)) @@ -322,14 +321,13 @@ proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, struct proc_inode *ei; struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task); + inode = proc_pid_make_inode(dir->i_sb, task, S_IFREG | S_IRUSR); if (!inode) goto out; ei = PROC_I(inode); ei->fd = fd; - inode->i_mode = S_IFREG | S_IRUSR; inode->i_fop = &proc_fdinfo_file_operations; d_set_d_op(dentry, &tid_fd_dentry_operations); diff --git a/fs/proc/fd.h b/fs/proc/fd.h index 7c047f256ae2..46dafadd0083 100644 --- a/fs/proc/fd.h +++ b/fs/proc/fd.h @@ -11,7 +11,7 @@ extern const struct inode_operations proc_fdinfo_inode_operations; extern int proc_fd_permission(struct inode *inode, int mask); -static inline int proc_fd(struct inode *inode) +static inline unsigned int proc_fd(struct inode *inode) { return PROC_I(inode)->fd; } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index c633476616e0..7eb3cefcf2a3 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -105,7 +105,7 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) struct proc_dir_entry *de = PDE(inode); int error; - error = inode_change_ok(inode, iattr); + error = setattr_prepare(dentry, iattr); if (error) return error; @@ -390,6 +390,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, atomic_set(&ent->count, 1); spin_lock_init(&ent->pde_unload_lock); INIT_LIST_HEAD(&ent->pde_openers); + proc_set_user(ent, (*parent)->uid, (*parent)->gid); + out: return ent; } @@ -477,6 +479,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name) } return ent; } +EXPORT_SYMBOL(proc_create_mount_point); struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index c1b72388e571..783bc19644d1 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -68,7 +68,6 @@ static struct inode *proc_alloc_inode(struct super_block *sb) ei->sysctl_entry = NULL; ei->ns_ops = NULL; inode = &ei->vfs_inode; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; return inode; } @@ -139,6 +138,16 @@ static void unuse_pde(struct proc_dir_entry *pde) /* pde is locked */ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) { + /* + * close() (proc_reg_release()) can't delete an entry and proceed: + * ->release hook needs to be available at the right moment. + * + * rmmod (remove_proc_entry() et al) can't delete an entry and proceed: + * "struct file" needs to be available at the right moment. + * + * Therefore, first process to enter this function does ->release() and + * signals its completion to the other process which does nothing. + */ if (pdeo->closing) { /* somebody else is doing that, just wait */ DECLARE_COMPLETION_ONSTACK(c); @@ -148,12 +157,13 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) spin_lock(&pde->pde_unload_lock); } else { struct file *file; - pdeo->closing = 1; + pdeo->closing = true; spin_unlock(&pde->pde_unload_lock); file = pdeo->file; pde->proc_fops->release(file_inode(file), file); spin_lock(&pde->pde_unload_lock); - list_del_init(&pdeo->lh); + /* After ->release. */ + list_del(&pdeo->lh); if (pdeo->c) complete(pdeo->c); kfree(pdeo); @@ -168,6 +178,8 @@ void proc_entry_rundown(struct proc_dir_entry *de) if (atomic_add_return(BIAS, &de->in_use) != BIAS) wait_for_completion(&c); + /* ->pde_openers list can't grow from now on. */ + spin_lock(&de->pde_unload_lock); while (!list_empty(&de->pde_openers)) { struct pde_opener *pdeo; @@ -313,16 +325,17 @@ static int proc_reg_open(struct inode *inode, struct file *file) struct pde_opener *pdeo; /* - * What for, you ask? Well, we can have open, rmmod, remove_proc_entry - * sequence. ->release won't be called because ->proc_fops will be - * cleared. Depending on complexity of ->release, consequences vary. + * Ensure that + * 1) PDE's ->release hook will be called no matter what + * either normally by close()/->release, or forcefully by + * rmmod/remove_proc_entry. + * + * 2) rmmod isn't blocked by opening file in /proc and sitting on + * the descriptor (including "rmmod foo </proc/foo" scenario). * - * We can't wait for mercy when close will be done for real, it's - * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release - * by hand in remove_proc_entry(). For this, save opener's credentials - * for later. + * Save every "struct file" with custom ->release hook. */ - pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL); + pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL); if (!pdeo) return -ENOMEM; @@ -339,7 +352,8 @@ static int proc_reg_open(struct inode *inode, struct file *file) if (rv == 0 && release) { /* To know what to release. */ pdeo->file = file; - /* Strictly for "too late" ->release in proc_reg_release(). */ + pdeo->closing = false; + pdeo->c = NULL; spin_lock(&pde->pde_unload_lock); list_add(&pdeo->lh, &pde->pde_openers); spin_unlock(&pde->pde_unload_lock); @@ -421,7 +435,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) if (inode) { inode->i_ino = de->low_ino; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); PROC_I(inode)->pde = de; if (is_empty_pde(de)) { diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 7931c558c192..2de5194ba378 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -60,7 +60,7 @@ union proc_op { struct proc_inode { struct pid *pid; - int fd; + unsigned int fd; union proc_op op; struct proc_dir_entry *pde; struct ctl_table_header *sysctl; @@ -162,7 +162,7 @@ extern int proc_pid_statm(struct seq_file *, struct pid_namespace *, extern const struct dentry_operations pid_dentry_operations; extern int pid_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int proc_setattr(struct dentry *, struct iattr *); -extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *); +extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t); extern int pid_revalidate(struct dentry *, unsigned int); extern int pid_delete_dentry(const struct dentry *); extern int proc_pid_readdir(struct file *, struct dir_context *); @@ -195,7 +195,6 @@ static inline bool is_empty_pde(const struct proc_dir_entry *pde) { return S_ISDIR(pde->mode) && !pde->proc_iops; } -struct proc_dir_entry *proc_create_mount_point(const char *name); /* * inode.c @@ -203,7 +202,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name); struct pde_opener { struct file *file; struct list_head lh; - int closing; + bool closing; struct completion *c; }; extern const struct inode_operations proc_link_inode_operations; @@ -211,6 +210,7 @@ extern const struct inode_operations proc_link_inode_operations; extern const struct inode_operations proc_pid_link_inode_operations; extern void proc_init_inodecache(void); +void set_proc_pid_nlink(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); extern int proc_fill_super(struct super_block *, void *data, int flags); extern void proc_entry_rundown(struct proc_dir_entry *); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index b9a8c813e5e6..8a428498d6b2 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -23,6 +23,25 @@ void __attribute__((weak)) arch_report_meminfo(struct seq_file *m) { } +static void show_val_kb(struct seq_file *m, const char *s, unsigned long num) +{ + char v[32]; + static const char blanks[7] = {' ', ' ', ' ', ' ',' ', ' ', ' '}; + int len; + + len = num_to_str(v, sizeof(v), num << (PAGE_SHIFT - 10)); + + seq_write(m, s, 16); + + if (len > 0) { + if (len < 8) + seq_write(m, blanks, 8 - len); + + seq_write(m, v, len); + } + seq_write(m, " kB\n", 4); +} + static int meminfo_proc_show(struct seq_file *m, void *v) { struct sysinfo i; @@ -32,10 +51,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v) unsigned long pages[NR_LRU_LISTS]; int lru; -/* - * display in kilobytes. - */ -#define K(x) ((x) << (PAGE_SHIFT - 10)) si_meminfo(&i); si_swapinfo(&i); committed = percpu_counter_read_positive(&vm_committed_as); @@ -50,136 +65,100 @@ static int meminfo_proc_show(struct seq_file *m, void *v) available = si_mem_available(); - /* - * Tagged format, for easy grepping and expansion. - */ - seq_printf(m, - "MemTotal: %8lu kB\n" - "MemFree: %8lu kB\n" - "MemAvailable: %8lu kB\n" - "Buffers: %8lu kB\n" - "Cached: %8lu kB\n" - "SwapCached: %8lu kB\n" - "Active: %8lu kB\n" - "Inactive: %8lu kB\n" - "Active(anon): %8lu kB\n" - "Inactive(anon): %8lu kB\n" - "Active(file): %8lu kB\n" - "Inactive(file): %8lu kB\n" - "Unevictable: %8lu kB\n" - "Mlocked: %8lu kB\n" -#ifdef CONFIG_HIGHMEM - "HighTotal: %8lu kB\n" - "HighFree: %8lu kB\n" - "LowTotal: %8lu kB\n" - "LowFree: %8lu kB\n" -#endif -#ifndef CONFIG_MMU - "MmapCopy: %8lu kB\n" -#endif - "SwapTotal: %8lu kB\n" - "SwapFree: %8lu kB\n" - "Dirty: %8lu kB\n" - "Writeback: %8lu kB\n" - "AnonPages: %8lu kB\n" - "Mapped: %8lu kB\n" - "Shmem: %8lu kB\n" - "Slab: %8lu kB\n" - "SReclaimable: %8lu kB\n" - "SUnreclaim: %8lu kB\n" - "KernelStack: %8lu kB\n" - "PageTables: %8lu kB\n" -#ifdef CONFIG_QUICKLIST - "Quicklists: %8lu kB\n" -#endif - "NFS_Unstable: %8lu kB\n" - "Bounce: %8lu kB\n" - "WritebackTmp: %8lu kB\n" - "CommitLimit: %8lu kB\n" - "Committed_AS: %8lu kB\n" - "VmallocTotal: %8lu kB\n" - "VmallocUsed: %8lu kB\n" - "VmallocChunk: %8lu kB\n" -#ifdef CONFIG_MEMORY_FAILURE - "HardwareCorrupted: %5lu kB\n" -#endif -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - "AnonHugePages: %8lu kB\n" - "ShmemHugePages: %8lu kB\n" - "ShmemPmdMapped: %8lu kB\n" -#endif -#ifdef CONFIG_CMA - "CmaTotal: %8lu kB\n" - "CmaFree: %8lu kB\n" -#endif - , - K(i.totalram), - K(i.freeram), - K(available), - K(i.bufferram), - K(cached), - K(total_swapcache_pages()), - K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), - K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), - K(pages[LRU_ACTIVE_ANON]), - K(pages[LRU_INACTIVE_ANON]), - K(pages[LRU_ACTIVE_FILE]), - K(pages[LRU_INACTIVE_FILE]), - K(pages[LRU_UNEVICTABLE]), - K(global_page_state(NR_MLOCK)), + show_val_kb(m, "MemTotal: ", i.totalram); + show_val_kb(m, "MemFree: ", i.freeram); + show_val_kb(m, "MemAvailable: ", available); + show_val_kb(m, "Buffers: ", i.bufferram); + show_val_kb(m, "Cached: ", cached); + show_val_kb(m, "SwapCached: ", total_swapcache_pages()); + show_val_kb(m, "Active: ", pages[LRU_ACTIVE_ANON] + + pages[LRU_ACTIVE_FILE]); + show_val_kb(m, "Inactive: ", pages[LRU_INACTIVE_ANON] + + pages[LRU_INACTIVE_FILE]); + show_val_kb(m, "Active(anon): ", pages[LRU_ACTIVE_ANON]); + show_val_kb(m, "Inactive(anon): ", pages[LRU_INACTIVE_ANON]); + show_val_kb(m, "Active(file): ", pages[LRU_ACTIVE_FILE]); + show_val_kb(m, "Inactive(file): ", pages[LRU_INACTIVE_FILE]); + show_val_kb(m, "Unevictable: ", pages[LRU_UNEVICTABLE]); + show_val_kb(m, "Mlocked: ", global_page_state(NR_MLOCK)); + #ifdef CONFIG_HIGHMEM - K(i.totalhigh), - K(i.freehigh), - K(i.totalram-i.totalhigh), - K(i.freeram-i.freehigh), + show_val_kb(m, "HighTotal: ", i.totalhigh); + show_val_kb(m, "HighFree: ", i.freehigh); + show_val_kb(m, "LowTotal: ", i.totalram - i.totalhigh); + show_val_kb(m, "LowFree: ", i.freeram - i.freehigh); #endif + #ifndef CONFIG_MMU - K((unsigned long) atomic_long_read(&mmap_pages_allocated)), + show_val_kb(m, "MmapCopy: ", + (unsigned long)atomic_long_read(&mmap_pages_allocated)); #endif - K(i.totalswap), - K(i.freeswap), - K(global_node_page_state(NR_FILE_DIRTY)), - K(global_node_page_state(NR_WRITEBACK)), - K(global_node_page_state(NR_ANON_MAPPED)), - K(global_node_page_state(NR_FILE_MAPPED)), - K(i.sharedram), - K(global_page_state(NR_SLAB_RECLAIMABLE) + - global_page_state(NR_SLAB_UNRECLAIMABLE)), - K(global_page_state(NR_SLAB_RECLAIMABLE)), - K(global_page_state(NR_SLAB_UNRECLAIMABLE)), - global_page_state(NR_KERNEL_STACK_KB), - K(global_page_state(NR_PAGETABLE)), + + show_val_kb(m, "SwapTotal: ", i.totalswap); + show_val_kb(m, "SwapFree: ", i.freeswap); + show_val_kb(m, "Dirty: ", + global_node_page_state(NR_FILE_DIRTY)); + show_val_kb(m, "Writeback: ", + global_node_page_state(NR_WRITEBACK)); + show_val_kb(m, "AnonPages: ", + global_node_page_state(NR_ANON_MAPPED)); + show_val_kb(m, "Mapped: ", + global_node_page_state(NR_FILE_MAPPED)); + show_val_kb(m, "Shmem: ", i.sharedram); + show_val_kb(m, "Slab: ", + global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_SLAB_UNRECLAIMABLE)); + + show_val_kb(m, "SReclaimable: ", + global_page_state(NR_SLAB_RECLAIMABLE)); + show_val_kb(m, "SUnreclaim: ", + global_page_state(NR_SLAB_UNRECLAIMABLE)); + seq_printf(m, "KernelStack: %8lu kB\n", + global_page_state(NR_KERNEL_STACK_KB)); + show_val_kb(m, "PageTables: ", + global_page_state(NR_PAGETABLE)); #ifdef CONFIG_QUICKLIST - K(quicklist_total_size()), + show_val_kb(m, "Quicklists: ", quicklist_total_size()); #endif - K(global_node_page_state(NR_UNSTABLE_NFS)), - K(global_page_state(NR_BOUNCE)), - K(global_node_page_state(NR_WRITEBACK_TEMP)), - K(vm_commit_limit()), - K(committed), - (unsigned long)VMALLOC_TOTAL >> 10, - 0ul, // used to be vmalloc 'used' - 0ul // used to be vmalloc 'largest_chunk' + + show_val_kb(m, "NFS_Unstable: ", + global_node_page_state(NR_UNSTABLE_NFS)); + show_val_kb(m, "Bounce: ", + global_page_state(NR_BOUNCE)); + show_val_kb(m, "WritebackTmp: ", + global_node_page_state(NR_WRITEBACK_TEMP)); + show_val_kb(m, "CommitLimit: ", vm_commit_limit()); + show_val_kb(m, "Committed_AS: ", committed); + seq_printf(m, "VmallocTotal: %8lu kB\n", + (unsigned long)VMALLOC_TOTAL >> 10); + show_val_kb(m, "VmallocUsed: ", 0ul); + show_val_kb(m, "VmallocChunk: ", 0ul); + #ifdef CONFIG_MEMORY_FAILURE - , atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10) + seq_printf(m, "HardwareCorrupted: %5lu kB\n", + atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)); #endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE - , K(global_node_page_state(NR_ANON_THPS) * HPAGE_PMD_NR) - , K(global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR) - , K(global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR) + show_val_kb(m, "AnonHugePages: ", + global_node_page_state(NR_ANON_THPS) * HPAGE_PMD_NR); + show_val_kb(m, "ShmemHugePages: ", + global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR); + show_val_kb(m, "ShmemPmdMapped: ", + global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR); #endif + #ifdef CONFIG_CMA - , K(totalcma_pages) - , K(global_page_state(NR_FREE_CMA_PAGES)) + show_val_kb(m, "CmaTotal: ", totalcma_pages); + show_val_kb(m, "CmaFree: ", + global_page_state(NR_FREE_CMA_PAGES)); #endif - ); hugetlb_report_meminfo(m); arch_report_meminfo(m); return 0; -#undef K } static int meminfo_proc_open(struct inode *inode, struct file *file) diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 51b8b0a8ad91..766f0c637ad1 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -92,12 +92,11 @@ static int proc_ns_instantiate(struct inode *dir, struct inode *inode; struct proc_inode *ei; - inode = proc_pid_make_inode(dir->i_sb, task); + inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK | S_IRWXUGO); if (!inode) goto out; ei = PROC_I(inode); - inode->i_mode = S_IFLNK|S_IRWXUGO; inode->i_op = &proc_ns_link_inode_operations; ei->ns_ops = ns_ops; diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index c8bbc68cdb05..7ae6b1da7cab 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -21,6 +21,7 @@ #include <linux/bitops.h> #include <linux/mount.h> #include <linux/nsproxy.h> +#include <linux/uidgid.h> #include <net/net_namespace.h> #include <linux/seq_file.h> @@ -185,6 +186,8 @@ const struct file_operations proc_net_operations = { static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *netd, *net_statd; + kuid_t uid; + kgid_t gid; int err; err = -ENOMEM; @@ -199,6 +202,16 @@ static __net_init int proc_net_ns_init(struct net *net) netd->parent = &proc_root; memcpy(netd->name, "net", 4); + uid = make_kuid(net->user_ns, 0); + if (!uid_valid(uid)) + uid = netd->uid; + + gid = make_kgid(net->user_ns, 0); + if (!gid_valid(gid)) + gid = netd->gid; + + proc_set_user(netd, uid, gid); + err = -EEXIST; net_statd = proc_net_mkdir(net, "stat", netd); if (!net_statd) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 1b93650dda2f..55313d994895 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -72,7 +72,7 @@ static DEFINE_SPINLOCK(sysctl_lock); static void drop_sysctl_table(struct ctl_table_header *header); static int sysctl_follow_link(struct ctl_table_header **phead, - struct ctl_table **pentry, struct nsproxy *namespaces); + struct ctl_table **pentry); static int insert_links(struct ctl_table_header *head); static void put_links(struct ctl_table_header *header); @@ -319,11 +319,11 @@ static void sysctl_head_finish(struct ctl_table_header *head) } static struct ctl_table_set * -lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) +lookup_header_set(struct ctl_table_root *root) { struct ctl_table_set *set = &root->default_set; if (root->lookup) - set = root->lookup(root, namespaces); + set = root->lookup(root); return set; } @@ -430,6 +430,7 @@ static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, i static struct inode *proc_sys_make_inode(struct super_block *sb, struct ctl_table_header *head, struct ctl_table *table) { + struct ctl_table_root *root = head->root; struct inode *inode; struct proc_inode *ei; @@ -444,7 +445,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, ei->sysctl = head; ei->sysctl_entry = table; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); inode->i_mode = table->mode; if (!S_ISDIR(table->mode)) { inode->i_mode |= S_IFREG; @@ -457,6 +458,10 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, if (is_empty_dir(head)) make_empty_dir_inode(inode); } + + if (root->set_ownership) + root->set_ownership(head, table, &inode->i_uid, &inode->i_gid); + out: return inode; } @@ -491,7 +496,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, goto out; if (S_ISLNK(p->mode)) { - ret = sysctl_follow_link(&h, &p, current->nsproxy); + ret = sysctl_follow_link(&h, &p); err = ERR_PTR(ret); if (ret) goto out; @@ -659,7 +664,7 @@ static bool proc_sys_link_fill_cache(struct file *file, if (S_ISLNK(table->mode)) { /* It is not an error if we can not follow the link ignore it */ - int err = sysctl_follow_link(&head, &table, current->nsproxy); + int err = sysctl_follow_link(&head, &table); if (err) goto out; } @@ -754,7 +759,7 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) return -EPERM; - error = inode_change_ok(inode, attr); + error = setattr_prepare(dentry, attr); if (error) return error; @@ -976,7 +981,7 @@ static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir) } static int sysctl_follow_link(struct ctl_table_header **phead, - struct ctl_table **pentry, struct nsproxy *namespaces) + struct ctl_table **pentry) { struct ctl_table_header *head; struct ctl_table_root *root; @@ -988,7 +993,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead, ret = 0; spin_lock(&sysctl_lock); root = (*pentry)->data; - set = lookup_header_set(root, namespaces); + set = lookup_header_set(root); dir = xlate_dir(set, (*phead)->parent); if (IS_ERR(dir)) ret = PTR_ERR(dir); diff --git a/fs/proc/root.c b/fs/proc/root.c index 8d3e484055a6..4bd0373576b5 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -122,6 +122,7 @@ void __init proc_root_init(void) int err; proc_init_inodecache(); + set_proc_pid_nlink(); err = register_filesystem(&proc_fs_type); if (err) return; diff --git a/fs/proc/self.c b/fs/proc/self.c index b6a8d3529fea..40245954c450 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -56,7 +56,7 @@ int proc_setup_self(struct super_block *s) struct inode *inode = new_inode_pseudo(s); if (inode) { inode->i_ino = self_inum; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 7907e456ac4f..d700c42b3572 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -115,17 +115,16 @@ static int show_stat(struct seq_file *p, void *v) } sum += arch_irq_stat(); - seq_puts(p, "cpu "); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice)); + seq_put_decimal_ull(p, "cpu ", cputime64_to_clock_t(user)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice)); seq_putc(p, '\n'); for_each_online_cpu(i) { @@ -141,23 +140,23 @@ static int show_stat(struct seq_file *p, void *v) guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; seq_printf(p, "cpu%d", i); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest)); - seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(user)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest)); + seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice)); seq_putc(p, '\n'); } - seq_printf(p, "intr %llu", (unsigned long long)sum); + seq_put_decimal_ull(p, "intr ", (unsigned long long)sum); /* sum again ? it could be updated? */ for_each_irq_nr(j) - seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j)); + seq_put_decimal_ull(p, " ", kstat_irqs_usr(j)); seq_printf(p, "\nctxt %llu\n" @@ -171,10 +170,10 @@ static int show_stat(struct seq_file *p, void *v) nr_running(), nr_iowait()); - seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq); + seq_put_decimal_ull(p, "softirq ", (unsigned long long)sum_softirq); for (i = 0; i < NR_SOFTIRQS; i++) - seq_put_decimal_ull(p, ' ', per_softirq_sums[i]); + seq_put_decimal_ull(p, " ", per_softirq_sums[i]); seq_putc(p, '\n'); return 0; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f6fa99eca515..958f32545064 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -147,7 +147,7 @@ m_next_vma(struct proc_maps_private *priv, struct vm_area_struct *vma) static void m_cache_vma(struct seq_file *m, struct vm_area_struct *vma) { if (m->count < m->size) /* vma is copied successfully */ - m->version = m_next_vma(m->private, vma) ? vma->vm_start : -1UL; + m->version = m_next_vma(m->private, vma) ? vma->vm_end : -1UL; } static void *m_start(struct seq_file *m, loff_t *ppos) @@ -175,8 +175,10 @@ static void *m_start(struct seq_file *m, loff_t *ppos) priv->tail_vma = get_gate_vma(mm); if (last_addr) { - vma = find_vma(mm, last_addr); - if (vma && (vma = m_next_vma(priv, vma))) + vma = find_vma(mm, last_addr - 1); + if (vma && vma->vm_start <= last_addr) + vma = m_next_vma(priv, vma); + if (vma) return vma; } @@ -264,24 +266,15 @@ static int do_maps_open(struct inode *inode, struct file *file, * /proc/PID/maps that is the stack of the main task. */ static int is_stack(struct proc_maps_private *priv, - struct vm_area_struct *vma, int is_pid) + struct vm_area_struct *vma) { - int stack = 0; - - if (is_pid) { - stack = vma->vm_start <= vma->vm_mm->start_stack && - vma->vm_end >= vma->vm_mm->start_stack; - } else { - struct inode *inode = priv->inode; - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(proc_pid(inode), PIDTYPE_PID); - if (task) - stack = vma_is_stack_for_task(vma, task); - rcu_read_unlock(); - } - return stack; + /* + * We make no effort to guess what a given thread considers to be + * its "stack". It's not even well-defined for programs written + * languages like Go. + */ + return vma->vm_start <= vma->vm_mm->start_stack && + vma->vm_end >= vma->vm_mm->start_stack; } static void @@ -352,7 +345,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) goto done; } - if (is_stack(priv, vma, is_pid)) + if (is_stack(priv, vma)) name = "[stack]"; } @@ -1070,7 +1063,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, } mmu_notifier_invalidate_range_start(mm, 0, -1); } - walk_page_range(0, ~0UL, &clear_refs_walk); + walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); if (type == CLEAR_REFS_SOFT_DIRTY) mmu_notifier_invalidate_range_end(mm, 0, -1); flush_tlb_mm(mm); @@ -1595,6 +1588,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(orig_pte, ptl); + cond_resched(); return 0; } #ifdef CONFIG_HUGETLB_PAGE @@ -1667,7 +1661,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) seq_file_path(m, file, "\n\t= "); } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_puts(m, " heap"); - } else if (is_stack(proc_priv, vma, is_pid)) { + } else if (is_stack(proc_priv, vma)) { seq_puts(m, " stack"); } diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index faacb0c0d857..37175621e890 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -124,25 +124,17 @@ unsigned long task_statm(struct mm_struct *mm, } static int is_stack(struct proc_maps_private *priv, - struct vm_area_struct *vma, int is_pid) + struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; - int stack = 0; - - if (is_pid) { - stack = vma->vm_start <= mm->start_stack && - vma->vm_end >= mm->start_stack; - } else { - struct inode *inode = priv->inode; - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(proc_pid(inode), PIDTYPE_PID); - if (task) - stack = vma_is_stack_for_task(vma, task); - rcu_read_unlock(); - } - return stack; + + /* + * We make no effort to guess what a given thread considers to be + * its "stack". It's not even well-defined for programs written + * languages like Go. + */ + return vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack; } /* @@ -184,7 +176,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, if (file) { seq_pad(m, ' '); seq_file_path(m, file, ""); - } else if (mm && is_stack(priv, vma, is_pid)) { + } else if (mm && is_stack(priv, vma)) { seq_pad(m, ' '); seq_printf(m, "[stack]"); } diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index e58a31e8fb2a..595b90a9766c 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -58,7 +58,7 @@ int proc_setup_thread_self(struct super_block *s) struct inode *inode = new_inode_pseudo(s); if (inode) { inode->i_ino = thread_self_inum; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; |