summaryrefslogtreecommitdiffstats
path: root/fs/proc/base.c
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2020-09-01 14:19:48 +0200
committerJiri Kosina <jkosina@suse.cz>2020-09-01 14:19:48 +0200
commitead5d1f4d877e92c051e1a1ade623d0d30e71619 (patch)
treecb9db5698a546e7b96f7d5bef5ce544629dd37a2 /fs/proc/base.c
parentscif: Fix spelling of EACCES (diff)
parentMerge tag 'docs-5.9-3' of git://git.lwn.net/linux (diff)
downloadlinux-ead5d1f4d877e92c051e1a1ade623d0d30e71619.tar.xz
linux-ead5d1f4d877e92c051e1a1ade623d0d30e71619.zip
Merge branch 'master' into for-next
Sync with Linus' branch in order to be able to apply fixups of more recent patches.
Diffstat (limited to 'fs/proc/base.c')
-rw-r--r--fs/proc/base.c264
1 files changed, 133 insertions, 131 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c7c64272b0fa..617db4e0faa0 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -405,11 +405,11 @@ print0:
static int lock_trace(struct task_struct *task)
{
- int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
+ int err = mutex_lock_killable(&task->signal->exec_update_mutex);
if (err)
return err;
if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
- mutex_unlock(&task->signal->cred_guard_mutex);
+ mutex_unlock(&task->signal->exec_update_mutex);
return -EPERM;
}
return 0;
@@ -417,7 +417,7 @@ static int lock_trace(struct task_struct *task)
static void unlock_trace(struct task_struct *task)
{
- mutex_unlock(&task->signal->cred_guard_mutex);
+ mutex_unlock(&task->signal->exec_update_mutex);
}
#ifdef CONFIG_STACKTRACE
@@ -551,8 +551,17 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
{
unsigned long totalpages = totalram_pages() + total_swap_pages;
unsigned long points = 0;
+ long badness;
+
+ badness = oom_badness(task, totalpages);
+ /*
+ * Special case OOM_SCORE_ADJ_MIN for all others scale the
+ * badness value into [0, 2000] range which we have been
+ * exporting for a long time so userspace might depend on it.
+ */
+ if (badness != LONG_MIN)
+ points = (1000 + badness * 1000 / (long)totalpages) * 2 / 3;
- points = oom_badness(task, totalpages) * 1000 / totalpages;
seq_printf(m, "%lu\n", points);
return 0;
@@ -697,13 +706,21 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr)
* May current process learn task's sched/cmdline info (for hide_pid_min=1)
* or euid/egid (for hide_pid_min=2)?
*/
-static bool has_pid_permissions(struct pid_namespace *pid,
+static bool has_pid_permissions(struct proc_fs_info *fs_info,
struct task_struct *task,
- int hide_pid_min)
+ enum proc_hidepid hide_pid_min)
{
- if (pid->hide_pid < hide_pid_min)
+ /*
+ * If 'hidpid' mount option is set force a ptrace check,
+ * we indicate that we are using a filesystem syscall
+ * by passing PTRACE_MODE_READ_FSCREDS
+ */
+ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE)
+ return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
+
+ if (fs_info->hide_pid < hide_pid_min)
return true;
- if (in_group_p(pid->pid_gid))
+ if (in_group_p(fs_info->pid_gid))
return true;
return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
}
@@ -711,18 +728,18 @@ static bool has_pid_permissions(struct pid_namespace *pid,
static int proc_pid_permission(struct inode *inode, int mask)
{
- struct pid_namespace *pid = proc_pid_ns(inode);
+ struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
struct task_struct *task;
bool has_perms;
task = get_proc_task(inode);
if (!task)
return -ESRCH;
- has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
+ has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS);
put_task_struct(task);
if (!has_perms) {
- if (pid->hide_pid == HIDEPID_INVISIBLE) {
+ if (fs_info->hide_pid == HIDEPID_INVISIBLE) {
/*
* Let's make getdents(), stat(), and open()
* consistent with each other. If a process
@@ -746,7 +763,7 @@ static const struct inode_operations proc_def_inode_operations = {
static int proc_single_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct pid_namespace *ns = proc_pid_ns(inode);
+ struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
struct pid *pid = proc_pid(inode);
struct task_struct *task;
int ret;
@@ -1415,7 +1432,7 @@ static const struct file_operations proc_fail_nth_operations = {
static int sched_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct pid_namespace *ns = proc_pid_ns(inode);
+ struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
struct task_struct *p;
p = get_proc_task(inode);
@@ -1573,6 +1590,7 @@ static ssize_t timens_offsets_write(struct file *file, const char __user *buf,
noffsets = 0;
for (pos = kbuf; pos; pos = next_line) {
struct proc_timens_offset *off = &offsets[noffsets];
+ char clock[10];
int err;
/* Find the end of line and ensure we don't look past it */
@@ -1584,10 +1602,21 @@ static ssize_t timens_offsets_write(struct file *file, const char __user *buf,
next_line = NULL;
}
- err = sscanf(pos, "%u %lld %lu", &off->clockid,
+ err = sscanf(pos, "%9s %lld %lu", clock,
&off->val.tv_sec, &off->val.tv_nsec);
if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC)
goto out;
+
+ clock[sizeof(clock) - 1] = 0;
+ if (strcmp(clock, "monotonic") == 0 ||
+ strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0)
+ off->clockid = CLOCK_MONOTONIC;
+ else if (strcmp(clock, "boottime") == 0 ||
+ strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0)
+ off->clockid = CLOCK_BOOTTIME;
+ else
+ goto out;
+
noffsets++;
if (noffsets == ARRAY_SIZE(offsets)) {
if (next_line)
@@ -1834,11 +1863,25 @@ void task_dump_owner(struct task_struct *task, umode_t mode,
*rgid = gid;
}
+void proc_pid_evict_inode(struct proc_inode *ei)
+{
+ struct pid *pid = ei->pid;
+
+ if (S_ISDIR(ei->vfs_inode.i_mode)) {
+ spin_lock(&pid->lock);
+ hlist_del_init_rcu(&ei->sibling_inodes);
+ spin_unlock(&pid->lock);
+ }
+
+ put_pid(pid);
+}
+
struct inode *proc_pid_make_inode(struct super_block * sb,
struct task_struct *task, umode_t mode)
{
struct inode * inode;
struct proc_inode *ei;
+ struct pid *pid;
/* We need a new inode */
@@ -1856,10 +1899,18 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
/*
* grab the reference to task.
*/
- ei->pid = get_task_pid(task, PIDTYPE_PID);
- if (!ei->pid)
+ pid = get_task_pid(task, PIDTYPE_PID);
+ if (!pid)
goto out_unlock;
+ /* Let the pid remember us for quick removal */
+ ei->pid = pid;
+ if (S_ISDIR(mode)) {
+ spin_lock(&pid->lock);
+ hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
+ spin_unlock(&pid->lock);
+ }
+
task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
security_task_to_inode(task, inode);
@@ -1875,7 +1926,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
- struct pid_namespace *pid = proc_pid_ns(inode);
+ struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
struct task_struct *task;
generic_fillattr(inode, stat);
@@ -1885,7 +1936,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
rcu_read_lock();
task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task) {
- if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
+ if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) {
rcu_read_unlock();
/*
* This doesn't prevent learning whether PID exists,
@@ -2070,11 +2121,11 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
goto out;
if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
- status = down_read_killable(&mm->mmap_sem);
+ status = mmap_read_lock_killable(mm);
if (!status) {
exact_vma_exists = !!find_exact_vma(mm, vm_start,
vm_end);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
}
}
@@ -2121,7 +2172,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
if (rc)
goto out_mmput;
- rc = down_read_killable(&mm->mmap_sem);
+ rc = mmap_read_lock_killable(mm);
if (rc)
goto out_mmput;
@@ -2132,7 +2183,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
path_get(path);
rc = 0;
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
out_mmput:
mmput(mm);
@@ -2147,16 +2198,16 @@ struct map_files_info {
};
/*
- * Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the
- * symlinks may be used to bypass permissions on ancestor directories in the
- * path to the file in question.
+ * Only allow CAP_SYS_ADMIN and CAP_CHECKPOINT_RESTORE to follow the links, due
+ * to concerns about how the symlinks may be used to bypass permissions on
+ * ancestor directories in the path to the file in question.
*/
static const char *
proc_map_files_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- if (!capable(CAP_SYS_ADMIN))
+ if (!checkpoint_restore_ns_capable(&init_user_ns))
return ERR_PTR(-EPERM);
return proc_pid_get_link(dentry, inode, done);
@@ -2222,7 +2273,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
goto out_put_task;
result = ERR_PTR(-EINTR);
- if (down_read_killable(&mm->mmap_sem))
+ if (mmap_read_lock_killable(mm))
goto out_put_mm;
result = ERR_PTR(-ENOENT);
@@ -2235,7 +2286,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
(void *)(unsigned long)vma->vm_file->f_mode);
out_no_vma:
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
out_put_mm:
mmput(mm);
out_put_task:
@@ -2280,7 +2331,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
if (!mm)
goto out_put_task;
- ret = down_read_killable(&mm->mmap_sem);
+ ret = mmap_read_lock_killable(mm);
if (ret) {
mmput(mm);
goto out_put_task;
@@ -2291,11 +2342,11 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
/*
* We need two passes here:
*
- * 1) Collect vmas of mapped files with mmap_sem taken
- * 2) Release mmap_sem and instantiate entries
+ * 1) Collect vmas of mapped files with mmap_lock taken
+ * 2) Release mmap_lock and instantiate entries
*
* otherwise we get lockdep complained, since filldir()
- * routine might require mmap_sem taken in might_fault().
+ * routine might require mmap_lock taken in might_fault().
*/
for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
@@ -2307,7 +2358,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL);
if (!p) {
ret = -ENOMEM;
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
mmput(mm);
goto out_put_task;
}
@@ -2316,7 +2367,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
p->end = vma->vm_end;
p->mode = vma->vm_file->f_mode;
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
mmput(mm);
for (i = 0; i < nr_files; i++) {
@@ -2436,7 +2487,7 @@ static int proc_timers_open(struct inode *inode, struct file *file)
return -ENOMEM;
tp->pid = proc_pid(inode);
- tp->ns = proc_pid_ns(inode);
+ tp->ns = proc_pid_ns(inode->i_sb);
return 0;
}
@@ -2736,6 +2787,15 @@ static const struct pid_entry smack_attr_dir_stuff[] = {
LSM_DIR_OPS(smack);
#endif
+#ifdef CONFIG_SECURITY_APPARMOR
+static const struct pid_entry apparmor_attr_dir_stuff[] = {
+ ATTR("apparmor", "current", 0666),
+ ATTR("apparmor", "prev", 0444),
+ ATTR("apparmor", "exec", 0666),
+};
+LSM_DIR_OPS(apparmor);
+#endif
+
static const struct pid_entry attr_dir_stuff[] = {
ATTR(NULL, "current", 0666),
ATTR(NULL, "prev", 0444),
@@ -2747,6 +2807,10 @@ static const struct pid_entry attr_dir_stuff[] = {
DIR("smack", 0555,
proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops),
#endif
+#ifdef CONFIG_SECURITY_APPARMOR
+ DIR("apparmor", 0555,
+ proc_apparmor_attr_dir_inode_ops, proc_apparmor_attr_dir_ops),
+#endif
};
static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
@@ -2861,7 +2925,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
unsigned long flags;
int result;
- result = mutex_lock_killable(&task->signal->cred_guard_mutex);
+ result = mutex_lock_killable(&task->signal->exec_update_mutex);
if (result)
return result;
@@ -2897,7 +2961,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
result = 0;
out_unlock:
- mutex_unlock(&task->signal->cred_guard_mutex);
+ mutex_unlock(&task->signal->exec_update_mutex);
return result;
}
@@ -3230,90 +3294,28 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
.permission = proc_pid_permission,
};
-static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
-{
- struct dentry *dentry, *leader, *dir;
- char buf[10 + 1];
- struct qstr name;
-
- name.name = buf;
- name.len = snprintf(buf, sizeof(buf), "%u", pid);
- /* no ->d_hash() rejects on procfs */
- dentry = d_hash_and_lookup(mnt->mnt_root, &name);
- if (dentry) {
- d_invalidate(dentry);
- dput(dentry);
- }
-
- if (pid == tgid)
- return;
-
- name.name = buf;
- name.len = snprintf(buf, sizeof(buf), "%u", tgid);
- leader = d_hash_and_lookup(mnt->mnt_root, &name);
- if (!leader)
- goto out;
-
- name.name = "task";
- name.len = strlen(name.name);
- dir = d_hash_and_lookup(leader, &name);
- if (!dir)
- goto out_put_leader;
-
- name.name = buf;
- name.len = snprintf(buf, sizeof(buf), "%u", pid);
- dentry = d_hash_and_lookup(dir, &name);
- if (dentry) {
- d_invalidate(dentry);
- dput(dentry);
- }
-
- dput(dir);
-out_put_leader:
- dput(leader);
-out:
- return;
-}
-
/**
- * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
- * @task: task that should be flushed.
- *
- * When flushing dentries from proc, one needs to flush them from global
- * proc (proc_mnt) and from all the namespaces' procs this task was seen
- * in. This call is supposed to do all of this job.
+ * proc_flush_pid - Remove dcache entries for @pid from the /proc dcache.
+ * @pid: pid that should be flushed.
*
- * Looks in the dcache for
- * /proc/@pid
- * /proc/@tgid/task/@pid
- * if either directory is present flushes it and all of it'ts children
- * from the dcache.
+ * This function walks a list of inodes (that belong to any proc
+ * filesystem) that are attached to the pid and flushes them from
+ * the dentry cache.
*
* It is safe and reasonable to cache /proc entries for a task until
* that task exits. After that they just clog up the dcache with
* useless entries, possibly causing useful dcache entries to be
- * flushed instead. This routine is proved to flush those useless
- * dcache entries at process exit time.
+ * flushed instead. This routine is provided to flush those useless
+ * dcache entries when a process is reaped.
*
* NOTE: This routine is just an optimization so it does not guarantee
- * that no dcache entries will exist at process exit time it
- * just makes it very unlikely that any will persist.
+ * that no dcache entries will exist after a process is reaped
+ * it just makes it very unlikely that any will persist.
*/
-void proc_flush_task(struct task_struct *task)
+void proc_flush_pid(struct pid *pid)
{
- int i;
- struct pid *pid, *tgid;
- struct upid *upid;
-
- pid = task_pid(task);
- tgid = task_tgid(task);
-
- for (i = 0; i <= pid->level; i++) {
- upid = &pid->numbers[i];
- proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
- tgid->numbers[i].nr);
- }
+ proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock);
}
static struct dentry *proc_pid_instantiate(struct dentry * dentry,
@@ -3340,6 +3342,7 @@ struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
{
struct task_struct *task;
unsigned tgid;
+ struct proc_fs_info *fs_info;
struct pid_namespace *ns;
struct dentry *result = ERR_PTR(-ENOENT);
@@ -3347,7 +3350,8 @@ struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
if (tgid == ~0U)
goto out;
- ns = dentry->d_sb->s_fs_info;
+ fs_info = proc_sb_info(dentry->d_sb);
+ ns = fs_info->pid_ns;
rcu_read_lock();
task = find_task_by_pid_ns(tgid, ns);
if (task)
@@ -3356,7 +3360,14 @@ struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
if (!task)
goto out;
+ /* Limit procfs to only ptraceable tasks */
+ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) {
+ if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS))
+ goto out_put_task;
+ }
+
result = proc_pid_instantiate(dentry, task, NULL);
+out_put_task:
put_task_struct(task);
out:
return result;
@@ -3382,20 +3393,8 @@ retry:
pid = find_ge_pid(iter.tgid, ns);
if (pid) {
iter.tgid = pid_nr_ns(pid, ns);
- iter.task = pid_task(pid, PIDTYPE_PID);
- /* What we to know is if the pid we have find is the
- * pid of a thread_group_leader. Testing for task
- * being a thread_group_leader is the obvious thing
- * todo but there is a window when it fails, due to
- * the pid transfer logic in de_thread.
- *
- * So we perform the straight forward test of seeing
- * if the pid we have found is the pid of a thread
- * group leader, and don't worry if the task we have
- * found doesn't happen to be a thread group leader.
- * As we don't care in the case of readdir.
- */
- if (!iter.task || !has_group_leader_pid(iter.task)) {
+ iter.task = pid_task(pid, PIDTYPE_TGID);
+ if (!iter.task) {
iter.tgid += 1;
goto retry;
}
@@ -3411,20 +3410,21 @@ retry:
int proc_pid_readdir(struct file *file, struct dir_context *ctx)
{
struct tgid_iter iter;
- struct pid_namespace *ns = proc_pid_ns(file_inode(file));
+ struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb);
+ struct pid_namespace *ns = proc_pid_ns(file_inode(file)->i_sb);
loff_t pos = ctx->pos;
if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
return 0;
if (pos == TGID_OFFSET - 2) {
- struct inode *inode = d_inode(ns->proc_self);
+ struct inode *inode = d_inode(fs_info->proc_self);
if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
return 0;
ctx->pos = pos = pos + 1;
}
if (pos == TGID_OFFSET - 1) {
- struct inode *inode = d_inode(ns->proc_thread_self);
+ struct inode *inode = d_inode(fs_info->proc_thread_self);
if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
return 0;
ctx->pos = pos = pos + 1;
@@ -3438,7 +3438,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
unsigned int len;
cond_resched();
- if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
+ if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE))
continue;
len = snprintf(name, sizeof(name), "%u", iter.tgid);
@@ -3638,6 +3638,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
struct task_struct *task;
struct task_struct *leader = get_proc_task(dir);
unsigned tid;
+ struct proc_fs_info *fs_info;
struct pid_namespace *ns;
struct dentry *result = ERR_PTR(-ENOENT);
@@ -3648,7 +3649,8 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
if (tid == ~0U)
goto out;
- ns = dentry->d_sb->s_fs_info;
+ fs_info = proc_sb_info(dentry->d_sb);
+ ns = fs_info->pid_ns;
rcu_read_lock();
task = find_task_by_pid_ns(tid, ns);
if (task)
@@ -3762,7 +3764,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
/* f_version caches the tgid value that the last readdir call couldn't
* return. lseek aka telldir automagically resets f_version to 0.
*/
- ns = proc_pid_ns(inode);
+ ns = proc_pid_ns(inode->i_sb);
tid = (int)file->f_version;
file->f_version = 0;
for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);