diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/base.c | 102 | ||||
-rw-r--r-- | fs/proc/fd.c | 12 | ||||
-rw-r--r-- | fs/proc/inode.c | 3 | ||||
-rw-r--r-- | fs/proc/internal.h | 23 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 66 |
5 files changed, 117 insertions, 89 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index 3d773eb9e144..b73b4de8fb36 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1667,12 +1667,63 @@ const struct inode_operations proc_pid_link_inode_operations = { /* building an inode */ +void task_dump_owner(struct task_struct *task, mode_t mode, + kuid_t *ruid, kgid_t *rgid) +{ + /* Depending on the state of dumpable compute who should own a + * proc file for a task. + */ + const struct cred *cred; + kuid_t uid; + kgid_t gid; + + /* Default to the tasks effective ownership */ + rcu_read_lock(); + cred = __task_cred(task); + uid = cred->euid; + gid = cred->egid; + rcu_read_unlock(); + + /* + * Before the /proc/pid/status file was created the only way to read + * the effective uid of a /process was to stat /proc/pid. Reading + * /proc/pid/status is slow enough that procps and other packages + * kept stating /proc/pid. To keep the rules in /proc simple I have + * made this apply to all per process world readable and executable + * directories. + */ + if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) { + struct mm_struct *mm; + task_lock(task); + mm = task->mm; + /* Make non-dumpable tasks owned by some root */ + if (mm) { + if (get_dumpable(mm) != SUID_DUMP_USER) { + struct user_namespace *user_ns = mm->user_ns; + + uid = make_kuid(user_ns, 0); + if (!uid_valid(uid)) + uid = GLOBAL_ROOT_UID; + + gid = make_kgid(user_ns, 0); + if (!gid_valid(gid)) + gid = GLOBAL_ROOT_GID; + } + } else { + uid = GLOBAL_ROOT_UID; + gid = GLOBAL_ROOT_GID; + } + task_unlock(task); + } + *ruid = uid; + *rgid = gid; +} + struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, umode_t mode) { struct inode * inode; struct proc_inode *ei; - const struct cred *cred; /* We need a new inode */ @@ -1694,13 +1745,7 @@ struct inode *proc_pid_make_inode(struct super_block * sb, if (!ei->pid) goto out_unlock; - if (task_dumpable(task)) { - rcu_read_lock(); - cred = __task_cred(task); - inode->i_uid = cred->euid; - inode->i_gid = cred->egid; - rcu_read_unlock(); - } + task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); security_task_to_inode(task, inode); out: @@ -1715,7 +1760,6 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = d_inode(dentry); struct task_struct *task; - const struct cred *cred; struct pid_namespace *pid = dentry->d_sb->s_fs_info; generic_fillattr(inode, stat); @@ -1733,12 +1777,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) */ return -ENOENT; } - if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || - task_dumpable(task)) { - cred = __task_cred(task); - stat->uid = cred->euid; - stat->gid = cred->egid; - } + task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid); } rcu_read_unlock(); return 0; @@ -1754,18 +1793,11 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) * Rewrite the inode's ownerships here because the owning task may have * performed a setuid(), etc. * - * Before the /proc/pid/status file was created the only way to read - * the effective uid of a /process was to stat /proc/pid. Reading - * /proc/pid/status is slow enough that procps and other packages - * kept stating /proc/pid. To keep the rules in /proc simple I have - * made this apply to all per process world readable and executable - * directories. */ int pid_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; - const struct cred *cred; if (flags & LOOKUP_RCU) return -ECHILD; @@ -1774,17 +1806,8 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) task = get_proc_task(inode); if (task) { - if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || - task_dumpable(task)) { - rcu_read_lock(); - cred = __task_cred(task); - inode->i_uid = cred->euid; - inode->i_gid = cred->egid; - rcu_read_unlock(); - } else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; - } + task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid); + inode->i_mode &= ~(S_ISUID | S_ISGID); security_task_to_inode(task, inode); put_task_struct(task); @@ -1881,7 +1904,6 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) bool exact_vma_exists = false; struct mm_struct *mm = NULL; struct task_struct *task; - const struct cred *cred; struct inode *inode; int status = 0; @@ -1906,16 +1928,8 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) mmput(mm); if (exact_vma_exists) { - if (task_dumpable(task)) { - rcu_read_lock(); - cred = __task_cred(task); - inode->i_uid = cred->euid; - inode->i_gid = cred->egid; - rcu_read_unlock(); - } else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; - } + task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); + security_task_to_inode(task, inode); status = 1; } diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 4274f83bf100..00ce1531b2f5 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -84,7 +84,6 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) { struct files_struct *files; struct task_struct *task; - const struct cred *cred; struct inode *inode; unsigned int fd; @@ -108,16 +107,7 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) rcu_read_unlock(); put_files_struct(files); - if (task_dumpable(task)) { - rcu_read_lock(); - cred = __task_cred(task); - inode->i_uid = cred->euid; - inode->i_gid = cred->egid; - rcu_read_unlock(); - } else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; - } + task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); if (S_ISLNK(inode->i_mode)) { unsigned i_mode = S_IFLNK; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 842a5ff5b85c..7ad9ed7958af 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -43,10 +43,11 @@ static void proc_evict_inode(struct inode *inode) de = PDE(inode); if (de) pde_put(de); + head = PROC_I(inode)->sysctl; if (head) { RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL); - sysctl_head_put(head); + proc_sys_evict_inode(inode, head); } } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 2de5194ba378..5d6960f5f1c0 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -65,6 +65,7 @@ struct proc_inode { struct proc_dir_entry *pde; struct ctl_table_header *sysctl; struct ctl_table *sysctl_entry; + struct list_head sysctl_inodes; const struct proc_ns_operations *ns_ops; struct inode vfs_inode; }; @@ -97,20 +98,8 @@ static inline struct task_struct *get_proc_task(struct inode *inode) return get_pid_task(proc_pid(inode), PIDTYPE_PID); } -static inline int task_dumpable(struct task_struct *task) -{ - int dumpable = 0; - struct mm_struct *mm; - - task_lock(task); - mm = task->mm; - if (mm) - dumpable = get_dumpable(mm); - task_unlock(task); - if (dumpable == SUID_DUMP_USER) - return 1; - return 0; -} +void task_dump_owner(struct task_struct *task, mode_t mode, + kuid_t *ruid, kgid_t *rgid); static inline unsigned name_to_int(const struct qstr *qstr) { @@ -249,10 +238,12 @@ extern void proc_thread_self_init(void); */ #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); -extern void sysctl_head_put(struct ctl_table_header *); +extern void proc_sys_evict_inode(struct inode *inode, + struct ctl_table_header *head); #else static inline void proc_sys_init(void) { } -static inline void sysctl_head_put(struct ctl_table_header *head) { } +static inline void proc_sys_evict_inode(struct inode *inode, + struct ctl_table_header *head) { } #endif /* diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d4e37acd4821..3e64c6502dc8 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -190,6 +190,7 @@ static void init_header(struct ctl_table_header *head, head->set = set; head->parent = NULL; head->node = node; + INIT_LIST_HEAD(&head->inodes); if (node) { struct ctl_table *entry; for (entry = table; entry->procname; entry++, node++) @@ -259,6 +260,27 @@ static void unuse_table(struct ctl_table_header *p) complete(p->unregistering); } +/* called under sysctl_lock */ +static void proc_sys_prune_dcache(struct ctl_table_header *head) +{ + struct inode *inode, *prev = NULL; + struct proc_inode *ei; + + rcu_read_lock(); + list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) { + inode = igrab(&ei->vfs_inode); + if (inode) { + rcu_read_unlock(); + iput(prev); + prev = inode; + d_prune_aliases(inode); + rcu_read_lock(); + } + } + rcu_read_unlock(); + iput(prev); +} + /* called under sysctl_lock, will reacquire if has to wait */ static void start_unregistering(struct ctl_table_header *p) { @@ -272,31 +294,22 @@ static void start_unregistering(struct ctl_table_header *p) p->unregistering = &wait; spin_unlock(&sysctl_lock); wait_for_completion(&wait); - spin_lock(&sysctl_lock); } else { /* anything non-NULL; we'll never dereference it */ p->unregistering = ERR_PTR(-EINVAL); + spin_unlock(&sysctl_lock); } /* + * Prune dentries for unregistered sysctls: namespaced sysctls + * can have duplicate names and contaminate dcache very badly. + */ + proc_sys_prune_dcache(p); + /* * do not remove from the list until nobody holds it; walking the * list in do_sysctl() relies on that. */ - erase_header(p); -} - -static void sysctl_head_get(struct ctl_table_header *head) -{ spin_lock(&sysctl_lock); - head->count++; - spin_unlock(&sysctl_lock); -} - -void sysctl_head_put(struct ctl_table_header *head) -{ - spin_lock(&sysctl_lock); - if (!--head->count) - kfree_rcu(head, rcu); - spin_unlock(&sysctl_lock); + erase_header(p); } static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) @@ -440,10 +453,20 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, inode->i_ino = get_next_ino(); - sysctl_head_get(head); ei = PROC_I(inode); + + spin_lock(&sysctl_lock); + if (unlikely(head->unregistering)) { + spin_unlock(&sysctl_lock); + iput(inode); + inode = NULL; + goto out; + } ei->sysctl = head; ei->sysctl_entry = table; + list_add_rcu(&ei->sysctl_inodes, &head->inodes); + head->count++; + spin_unlock(&sysctl_lock); inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); inode->i_mode = table->mode; @@ -466,6 +489,15 @@ out: return inode; } +void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) +{ + spin_lock(&sysctl_lock); + list_del_rcu(&PROC_I(inode)->sysctl_inodes); + if (!--head->count) + kfree_rcu(head, rcu); + spin_unlock(&sysctl_lock); +} + static struct ctl_table_header *grab_header(struct inode *inode) { struct ctl_table_header *head = PROC_I(inode)->sysctl; |