summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/attr.c11
-rw-r--r--fs/autofs4/autofs_i.h8
-rw-r--r--fs/autofs4/dev-ioctl.c4
-rw-r--r--fs/autofs4/inode.c24
-rw-r--r--fs/autofs4/waitq.c5
-rw-r--r--fs/exec.c9
-rw-r--r--fs/fuse/dev.c4
-rw-r--r--fs/fuse/dir.c20
-rw-r--r--fs/fuse/fuse_i.h4
-rw-r--r--fs/fuse/inode.c23
-rw-r--r--fs/hppfs/hppfs.c2
-rw-r--r--fs/mount.h3
-rw-r--r--fs/namespace.c211
-rw-r--r--fs/open.c2
-rw-r--r--fs/pnode.h1
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c169
-rw-r--r--fs/proc/generic.c26
-rw-r--r--fs/proc/inode.c6
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/namespaces.c185
-rw-r--r--fs/proc/root.c17
-rw-r--r--fs/proc/self.c59
-rw-r--r--fs/sysfs/mount.c1
25 files changed, 493 insertions, 305 deletions
diff --git a/fs/attr.c b/fs/attr.c
index cce7df53b694..1449adb14ef6 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -49,14 +49,15 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
/* Make sure a caller can chown. */
if ((ia_valid & ATTR_UID) &&
(!uid_eq(current_fsuid(), inode->i_uid) ||
- !uid_eq(attr->ia_uid, inode->i_uid)) && !capable(CAP_CHOWN))
+ !uid_eq(attr->ia_uid, inode->i_uid)) &&
+ !inode_capable(inode, CAP_CHOWN))
return -EPERM;
/* Make sure caller can chgrp. */
if ((ia_valid & ATTR_GID) &&
(!uid_eq(current_fsuid(), inode->i_uid) ||
(!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
- !capable(CAP_CHOWN))
+ !inode_capable(inode, CAP_CHOWN))
return -EPERM;
/* Make sure a caller can chmod. */
@@ -65,7 +66,8 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
return -EPERM;
/* Also check the setgid bit! */
if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
- inode->i_gid) && !capable(CAP_FSETID))
+ inode->i_gid) &&
+ !inode_capable(inode, CAP_FSETID))
attr->ia_mode &= ~S_ISGID;
}
@@ -157,7 +159,8 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+ if (!in_group_p(inode->i_gid) &&
+ !inode_capable(inode, CAP_FSETID))
mode &= ~S_ISGID;
inode->i_mode = mode;
}
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 908e18455413..b785e7707959 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -74,8 +74,8 @@ struct autofs_info {
unsigned long last_used;
atomic_t count;
- uid_t uid;
- gid_t gid;
+ kuid_t uid;
+ kgid_t gid;
};
#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
@@ -89,8 +89,8 @@ struct autofs_wait_queue {
struct qstr name;
u32 dev;
u64 ino;
- uid_t uid;
- gid_t gid;
+ kuid_t uid;
+ kgid_t gid;
pid_t pid;
pid_t tgid;
/* This is for status reporting upon return */
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index a16214109d31..9f68a37bb2b2 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -437,8 +437,8 @@ static int autofs_dev_ioctl_requester(struct file *fp,
err = 0;
autofs4_expire_wait(path.dentry);
spin_lock(&sbi->fs_lock);
- param->requester.uid = ino->uid;
- param->requester.gid = ino->gid;
+ param->requester.uid = from_kuid_munged(current_user_ns(), ino->uid);
+ param->requester.gid = from_kgid_munged(current_user_ns(), ino->gid);
spin_unlock(&sbi->fs_lock);
}
path_put(&path);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 8a4fed8ead30..b104726e2d0a 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -36,8 +36,8 @@ struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi)
void autofs4_clean_ino(struct autofs_info *ino)
{
- ino->uid = 0;
- ino->gid = 0;
+ ino->uid = GLOBAL_ROOT_UID;
+ ino->gid = GLOBAL_ROOT_GID;
ino->last_used = jiffies;
}
@@ -79,10 +79,12 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root)
return 0;
seq_printf(m, ",fd=%d", sbi->pipefd);
- if (root_inode->i_uid != 0)
- seq_printf(m, ",uid=%u", root_inode->i_uid);
- if (root_inode->i_gid != 0)
- seq_printf(m, ",gid=%u", root_inode->i_gid);
+ if (!uid_eq(root_inode->i_uid, GLOBAL_ROOT_UID))
+ seq_printf(m, ",uid=%u",
+ from_kuid_munged(&init_user_ns, root_inode->i_uid));
+ if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID))
+ seq_printf(m, ",gid=%u",
+ from_kgid_munged(&init_user_ns, root_inode->i_gid));
seq_printf(m, ",pgrp=%d", sbi->oz_pgrp);
seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ);
seq_printf(m, ",minproto=%d", sbi->min_proto);
@@ -126,7 +128,7 @@ static const match_table_t tokens = {
{Opt_err, NULL}
};
-static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
+static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto)
{
char *p;
@@ -159,12 +161,16 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
case Opt_uid:
if (match_int(args, &option))
return 1;
- *uid = option;
+ *uid = make_kuid(current_user_ns(), option);
+ if (!uid_valid(*uid))
+ return 1;
break;
case Opt_gid:
if (match_int(args, &option))
return 1;
- *gid = option;
+ *gid = make_kgid(current_user_ns(), option);
+ if (!gid_valid(*gid))
+ return 1;
break;
case Opt_pgrp:
if (match_int(args, &option))
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index dce436e595c1..03bc1d347d8e 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -154,6 +154,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
case autofs_ptype_expire_direct:
{
struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet;
+ struct user_namespace *user_ns = sbi->pipe->f_cred->user_ns;
pktsz = sizeof(*packet);
@@ -163,8 +164,8 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
packet->name[wq->name.len] = '\0';
packet->dev = wq->dev;
packet->ino = wq->ino;
- packet->uid = wq->uid;
- packet->gid = wq->gid;
+ packet->uid = from_kuid_munged(user_ns, wq->uid);
+ packet->gid = from_kgid_munged(user_ns, wq->gid);
packet->pid = wq->pid;
packet->tgid = wq->tgid;
break;
diff --git a/fs/exec.c b/fs/exec.c
index 721a29929511..b71b08ce7120 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1266,14 +1266,13 @@ int prepare_binprm(struct linux_binprm *bprm)
bprm->cred->egid = current_egid();
if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
- !current->no_new_privs) {
+ !current->no_new_privs &&
+ kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
+ kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
/* Set-uid? */
if (mode & S_ISUID) {
- if (!kuid_has_mapping(bprm->cred->user_ns, inode->i_uid))
- return -EPERM;
bprm->per_clear |= PER_CLEAR_ON_SETID;
bprm->cred->euid = inode->i_uid;
-
}
/* Set-gid? */
@@ -1283,8 +1282,6 @@ int prepare_binprm(struct linux_binprm *bprm)
* executable.
*/
if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
- if (!kgid_has_mapping(bprm->cred->user_ns, inode->i_gid))
- return -EPERM;
bprm->per_clear |= PER_CLEAR_ON_SETID;
bprm->cred->egid = inode->i_gid;
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8c23fa7a91e6..c16335315e5d 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -92,8 +92,8 @@ static void __fuse_put_request(struct fuse_req *req)
static void fuse_req_init_context(struct fuse_req *req)
{
- req->in.h.uid = current_fsuid();
- req->in.h.gid = current_fsgid();
+ req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
+ req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
req->in.h.pid = current->pid;
}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 324bc0850534..b7c09f9eb40c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -818,8 +818,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
stat->ino = attr->ino;
stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
stat->nlink = attr->nlink;
- stat->uid = attr->uid;
- stat->gid = attr->gid;
+ stat->uid = make_kuid(&init_user_ns, attr->uid);
+ stat->gid = make_kgid(&init_user_ns, attr->gid);
stat->rdev = inode->i_rdev;
stat->atime.tv_sec = attr->atime;
stat->atime.tv_nsec = attr->atimensec;
@@ -1007,12 +1007,12 @@ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
rcu_read_lock();
ret = 0;
cred = __task_cred(task);
- if (cred->euid == fc->user_id &&
- cred->suid == fc->user_id &&
- cred->uid == fc->user_id &&
- cred->egid == fc->group_id &&
- cred->sgid == fc->group_id &&
- cred->gid == fc->group_id)
+ if (uid_eq(cred->euid, fc->user_id) &&
+ uid_eq(cred->suid, fc->user_id) &&
+ uid_eq(cred->uid, fc->user_id) &&
+ gid_eq(cred->egid, fc->group_id) &&
+ gid_eq(cred->sgid, fc->group_id) &&
+ gid_eq(cred->gid, fc->group_id))
ret = 1;
rcu_read_unlock();
@@ -1306,9 +1306,9 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
if (ivalid & ATTR_MODE)
arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
if (ivalid & ATTR_UID)
- arg->valid |= FATTR_UID, arg->uid = iattr->ia_uid;
+ arg->valid |= FATTR_UID, arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
if (ivalid & ATTR_GID)
- arg->valid |= FATTR_GID, arg->gid = iattr->ia_gid;
+ arg->valid |= FATTR_GID, arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
if (ivalid & ATTR_SIZE)
arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
if (ivalid & ATTR_ATIME) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e24dd74e3068..e105a53fc72d 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -333,10 +333,10 @@ struct fuse_conn {
atomic_t count;
/** The user id for this mount */
- uid_t user_id;
+ kuid_t user_id;
/** The group id for this mount */
- gid_t group_id;
+ kgid_t group_id;
/** The fuse mount flags for this mount */
unsigned flags;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f0eda124cffb..73ca6b72beaf 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -60,8 +60,8 @@ MODULE_PARM_DESC(max_user_congthresh,
struct fuse_mount_data {
int fd;
unsigned rootmode;
- unsigned user_id;
- unsigned group_id;
+ kuid_t user_id;
+ kgid_t group_id;
unsigned fd_present:1;
unsigned rootmode_present:1;
unsigned user_id_present:1;
@@ -164,8 +164,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
inode->i_ino = fuse_squash_ino(attr->ino);
inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
set_nlink(inode, attr->nlink);
- inode->i_uid = attr->uid;
- inode->i_gid = attr->gid;
+ inode->i_uid = make_kuid(&init_user_ns, attr->uid);
+ inode->i_gid = make_kgid(&init_user_ns, attr->gid);
inode->i_blocks = attr->blocks;
inode->i_atime.tv_sec = attr->atime;
inode->i_atime.tv_nsec = attr->atimensec;
@@ -492,14 +492,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
case OPT_USER_ID:
if (match_int(&args[0], &value))
return 0;
- d->user_id = value;
+ d->user_id = make_kuid(current_user_ns(), value);
+ if (!uid_valid(d->user_id))
+ return 0;
d->user_id_present = 1;
break;
case OPT_GROUP_ID:
if (match_int(&args[0], &value))
return 0;
- d->group_id = value;
+ d->group_id = make_kgid(current_user_ns(), value);
+ if (!gid_valid(d->group_id))
+ return 0;
d->group_id_present = 1;
break;
@@ -540,8 +544,8 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
struct super_block *sb = root->d_sb;
struct fuse_conn *fc = get_fuse_conn_super(sb);
- seq_printf(m, ",user_id=%u", fc->user_id);
- seq_printf(m, ",group_id=%u", fc->group_id);
+ seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id));
+ seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id));
if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
seq_puts(m, ",default_permissions");
if (fc->flags & FUSE_ALLOW_OTHER)
@@ -989,7 +993,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
if (!file)
goto err;
- if (file->f_op != &fuse_dev_operations)
+ if ((file->f_op != &fuse_dev_operations) ||
+ (file->f_cred->user_ns != &init_user_ns))
goto err_fput;
fc = kmalloc(sizeof(*fc), GFP_KERNEL);
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 78f21f8dc2ec..43b315f2002b 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -710,7 +710,7 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
struct vfsmount *proc_mnt;
int err = -ENOENT;
- proc_mnt = mntget(current->nsproxy->pid_ns->proc_mnt);
+ proc_mnt = mntget(task_active_pid_ns(current)->proc_mnt);
if (IS_ERR(proc_mnt))
goto out;
diff --git a/fs/mount.h b/fs/mount.h
index 4f291f9de641..cd5007980400 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -4,8 +4,11 @@
struct mnt_namespace {
atomic_t count;
+ unsigned int proc_inum;
struct mount * root;
struct list_head list;
+ struct user_namespace *user_ns;
+ u64 seq; /* Sequence number to prevent loops */
wait_queue_head_t poll;
int event;
};
diff --git a/fs/namespace.c b/fs/namespace.c
index 24960626bb6b..c1bbe86f4920 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -12,6 +12,7 @@
#include <linux/export.h>
#include <linux/capability.h>
#include <linux/mnt_namespace.h>
+#include <linux/user_namespace.h>
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/idr.h>
@@ -20,6 +21,7 @@
#include <linux/fs_struct.h> /* get_fs_root et.al. */
#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
#include "pnode.h"
#include "internal.h"
@@ -784,7 +786,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
if (!mnt)
return ERR_PTR(-ENOMEM);
- if (flag & (CL_SLAVE | CL_PRIVATE))
+ if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
mnt->mnt_group_id = 0; /* not a peer of original */
else
mnt->mnt_group_id = old->mnt_group_id;
@@ -805,7 +807,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
br_write_unlock(&vfsmount_lock);
- if (flag & CL_SLAVE) {
+ if ((flag & CL_SLAVE) ||
+ ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
list_add(&mnt->mnt_slave, &old->mnt_slave_list);
mnt->mnt_master = old;
CLEAR_MNT_SHARED(mnt);
@@ -1266,7 +1269,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
goto dput_and_out;
retval = -EPERM;
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
goto dput_and_out;
retval = do_umount(mnt, flags);
@@ -1292,7 +1295,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
static int mount_is_safe(struct path *path)
{
- if (capable(CAP_SYS_ADMIN))
+ if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
return 0;
return -EPERM;
#ifdef notyet
@@ -1308,6 +1311,26 @@ static int mount_is_safe(struct path *path)
#endif
}
+static bool mnt_ns_loop(struct path *path)
+{
+ /* Could bind mounting the mount namespace inode cause a
+ * mount namespace loop?
+ */
+ struct inode *inode = path->dentry->d_inode;
+ struct proc_inode *ei;
+ struct mnt_namespace *mnt_ns;
+
+ if (!proc_ns_inode(inode))
+ return false;
+
+ ei = PROC_I(inode);
+ if (ei->ns_ops != &mntns_operations)
+ return false;
+
+ mnt_ns = ei->ns;
+ return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
+}
+
struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
int flag)
{
@@ -1610,7 +1633,7 @@ static int do_change_type(struct path *path, int flag)
int type;
int err = 0;
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
if (path->dentry != path->mnt->mnt_root)
@@ -1655,6 +1678,10 @@ static int do_loopback(struct path *path, const char *old_name,
if (err)
return err;
+ err = -EINVAL;
+ if (mnt_ns_loop(&old_path))
+ goto out;
+
err = lock_mount(path);
if (err)
goto out;
@@ -1770,7 +1797,7 @@ static int do_move_mount(struct path *path, const char *old_name)
struct mount *p;
struct mount *old;
int err = 0;
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
if (!old_name || !*old_name)
return -EINVAL;
@@ -1857,21 +1884,6 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
return ERR_PTR(err);
}
-static struct vfsmount *
-do_kern_mount(const char *fstype, int flags, const char *name, void *data)
-{
- struct file_system_type *type = get_fs_type(fstype);
- struct vfsmount *mnt;
- if (!type)
- return ERR_PTR(-ENODEV);
- mnt = vfs_kern_mount(type, flags, name, data);
- if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
- !mnt->mnt_sb->s_subtype)
- mnt = fs_set_subtype(mnt, fstype);
- put_filesystem(type);
- return mnt;
-}
-
/*
* add a mount into a namespace's mount tree
*/
@@ -1917,20 +1929,46 @@ unlock:
* create a new mount for userspace and request it to be added into the
* namespace's tree
*/
-static int do_new_mount(struct path *path, const char *type, int flags,
+static int do_new_mount(struct path *path, const char *fstype, int flags,
int mnt_flags, const char *name, void *data)
{
+ struct file_system_type *type;
+ struct user_namespace *user_ns;
struct vfsmount *mnt;
int err;
- if (!type)
+ if (!fstype)
return -EINVAL;
/* we need capabilities... */
- if (!capable(CAP_SYS_ADMIN))
+ user_ns = real_mount(path->mnt)->mnt_ns->user_ns;
+ if (!ns_capable(user_ns, CAP_SYS_ADMIN))
return -EPERM;
- mnt = do_kern_mount(type, flags, name, data);
+ type = get_fs_type(fstype);
+ if (!type)
+ return -ENODEV;
+
+ if (user_ns != &init_user_ns) {
+ if (!(type->fs_flags & FS_USERNS_MOUNT)) {
+ put_filesystem(type);
+ return -EPERM;
+ }
+ /* Only in special cases allow devices from mounts
+ * created outside the initial user namespace.
+ */
+ if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
+ flags |= MS_NODEV;
+ mnt_flags |= MNT_NODEV;
+ }
+ }
+
+ mnt = vfs_kern_mount(type, flags, name, data);
+ if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
+ !mnt->mnt_sb->s_subtype)
+ mnt = fs_set_subtype(mnt, fstype);
+
+ put_filesystem(type);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
@@ -2261,18 +2299,42 @@ dput_out:
return retval;
}
-static struct mnt_namespace *alloc_mnt_ns(void)
+static void free_mnt_ns(struct mnt_namespace *ns)
+{
+ proc_free_inum(ns->proc_inum);
+ put_user_ns(ns->user_ns);
+ kfree(ns);
+}
+
+/*
+ * Assign a sequence number so we can detect when we attempt to bind
+ * mount a reference to an older mount namespace into the current
+ * mount namespace, preventing reference counting loops. A 64bit
+ * number incrementing at 10Ghz will take 12,427 years to wrap which
+ * is effectively never, so we can ignore the possibility.
+ */
+static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
+
+static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
{
struct mnt_namespace *new_ns;
+ int ret;
new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
if (!new_ns)
return ERR_PTR(-ENOMEM);
+ ret = proc_alloc_inum(&new_ns->proc_inum);
+ if (ret) {
+ kfree(new_ns);
+ return ERR_PTR(ret);
+ }
+ new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
atomic_set(&new_ns->count, 1);
new_ns->root = NULL;
INIT_LIST_HEAD(&new_ns->list);
init_waitqueue_head(&new_ns->poll);
new_ns->event = 0;
+ new_ns->user_ns = get_user_ns(user_ns);
return new_ns;
}
@@ -2281,24 +2343,28 @@ static struct mnt_namespace *alloc_mnt_ns(void)
* copied from the namespace of the passed in task structure.
*/
static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
- struct fs_struct *fs)
+ struct user_namespace *user_ns, struct fs_struct *fs)
{
struct mnt_namespace *new_ns;
struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
struct mount *p, *q;
struct mount *old = mnt_ns->root;
struct mount *new;
+ int copy_flags;
- new_ns = alloc_mnt_ns();
+ new_ns = alloc_mnt_ns(user_ns);
if (IS_ERR(new_ns))
return new_ns;
down_write(&namespace_sem);
/* First pass: copy the tree topology */
- new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE);
+ copy_flags = CL_COPY_ALL | CL_EXPIRE;
+ if (user_ns != mnt_ns->user_ns)
+ copy_flags |= CL_SHARED_TO_SLAVE;
+ new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) {
up_write(&namespace_sem);
- kfree(new_ns);
+ free_mnt_ns(new_ns);
return ERR_CAST(new);
}
new_ns->root = new;
@@ -2339,7 +2405,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
}
struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
- struct fs_struct *new_fs)
+ struct user_namespace *user_ns, struct fs_struct *new_fs)
{
struct mnt_namespace *new_ns;
@@ -2349,7 +2415,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
if (!(flags & CLONE_NEWNS))
return ns;
- new_ns = dup_mnt_ns(ns, new_fs);
+ new_ns = dup_mnt_ns(ns, user_ns, new_fs);
put_mnt_ns(ns);
return new_ns;
@@ -2361,7 +2427,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
*/
static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
{
- struct mnt_namespace *new_ns = alloc_mnt_ns();
+ struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
if (!IS_ERR(new_ns)) {
struct mount *mnt = real_mount(m);
mnt->mnt_ns = new_ns;
@@ -2501,7 +2567,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
struct mount *new_mnt, *root_mnt;
int error;
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
error = user_path_dir(new_root, &new);
@@ -2583,8 +2649,13 @@ static void __init init_mount_tree(void)
struct vfsmount *mnt;
struct mnt_namespace *ns;
struct path root;
+ struct file_system_type *type;
- mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
+ type = get_fs_type("rootfs");
+ if (!type)
+ panic("Can't find rootfs type");
+ mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
+ put_filesystem(type);
if (IS_ERR(mnt))
panic("Can't create rootfs");
@@ -2647,7 +2718,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
br_write_unlock(&vfsmount_lock);
up_write(&namespace_sem);
release_mounts(&umount_list);
- kfree(ns);
+ free_mnt_ns(ns);
}
struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
@@ -2681,3 +2752,71 @@ bool our_mnt(struct vfsmount *mnt)
{
return check_mnt(real_mount(mnt));
}
+
+static void *mntns_get(struct task_struct *task)
+{
+ struct mnt_namespace *ns = NULL;
+ struct nsproxy *nsproxy;
+
+ rcu_read_lock();
+ nsproxy = task_nsproxy(task);
+ if (nsproxy) {
+ ns = nsproxy->mnt_ns;
+ get_mnt_ns(ns);
+ }
+ rcu_read_unlock();
+
+ return ns;
+}
+
+static void mntns_put(void *ns)
+{
+ put_mnt_ns(ns);
+}
+
+static int mntns_install(struct nsproxy *nsproxy, void *ns)
+{
+ struct fs_struct *fs = current->fs;
+ struct mnt_namespace *mnt_ns = ns;
+ struct path root;
+
+ if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
+ !nsown_capable(CAP_SYS_CHROOT))
+ return -EPERM;
+
+ if (fs->users != 1)
+ return -EINVAL;
+
+ get_mnt_ns(mnt_ns);
+ put_mnt_ns(nsproxy->mnt_ns);
+ nsproxy->mnt_ns = mnt_ns;
+
+ /* Find the root */
+ root.mnt = &mnt_ns->root->mnt;
+ root.dentry = mnt_ns->root->mnt.mnt_root;
+ path_get(&root);
+ while(d_mountpoint(root.dentry) && follow_down_one(&root))
+ ;
+
+ /* Update the pwd and root */
+ set_fs_pwd(fs, &root);
+ set_fs_root(fs, &root);
+
+ path_put(&root);
+ return 0;
+}
+
+static unsigned int mntns_inum(void *ns)
+{
+ struct mnt_namespace *mnt_ns = ns;
+ return mnt_ns->proc_inum;
+}
+
+const struct proc_ns_operations mntns_operations = {
+ .name = "mnt",
+ .type = CLONE_NEWNS,
+ .get = mntns_get,
+ .put = mntns_put,
+ .install = mntns_install,
+ .inum = mntns_inum,
+};
diff --git a/fs/open.c b/fs/open.c
index 59071f55bf7f..182d8667b7bd 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -435,7 +435,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)
goto dput_and_out;
error = -EPERM;
- if (!capable(CAP_SYS_CHROOT))
+ if (!nsown_capable(CAP_SYS_CHROOT))
goto dput_and_out;
error = security_path_chroot(&path);
if (error)
diff --git a/fs/pnode.h b/fs/pnode.h
index 65c60979d541..19b853a3445c 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -22,6 +22,7 @@
#define CL_COPY_ALL 0x04
#define CL_MAKE_SHARED 0x08
#define CL_PRIVATE 0x10
+#define CL_SHARED_TO_SLAVE 0x20
static inline void set_mnt_shared(struct mount *mnt)
{
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 99349efbbc2b..981b05601931 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -21,6 +21,7 @@ proc-y += uptime.o
proc-y += version.o
proc-y += softirqs.o
proc-y += namespaces.o
+proc-y += self.o
proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
proc-$(CONFIG_NET) += proc_net.o
proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index d3696708fc1a..d66248a1919b 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -162,7 +162,7 @@ static inline const char *get_task_state(struct task_struct *tsk)
static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *p)
{
- struct user_namespace *user_ns = current_user_ns();
+ struct user_namespace *user_ns = seq_user_ns(m);
struct group_info *group_info;
int g;
struct fdtable *fdt = NULL;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index aa63d25157b8..5a5a0be40e40 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2345,146 +2345,6 @@ static const struct file_operations proc_coredump_filter_operations = {
};
#endif
-/*
- * /proc/self:
- */
-static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
- int buflen)
-{
- struct pid_namespace *ns = dentry->d_sb->s_fs_info;
- pid_t tgid = task_tgid_nr_ns(current, ns);
- char tmp[PROC_NUMBUF];
- if (!tgid)
- return -ENOENT;
- sprintf(tmp, "%d", tgid);
- return vfs_readlink(dentry,buffer,buflen,tmp);
-}
-
-static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct pid_namespace *ns = dentry->d_sb->s_fs_info;
- pid_t tgid = task_tgid_nr_ns(current, ns);
- char *name = ERR_PTR(-ENOENT);
- if (tgid) {
- /* 11 for max length of signed int in decimal + NULL term */
- name = kmalloc(12, GFP_KERNEL);
- if (!name)
- name = ERR_PTR(-ENOMEM);
- else
- sprintf(name, "%d", tgid);
- }
- nd_set_link(nd, name);
- return NULL;
-}
-
-static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
- void *cookie)
-{
- char *s = nd_get_link(nd);
- if (!IS_ERR(s))
- kfree(s);
-}
-
-static const struct inode_operations proc_self_inode_operations = {
- .readlink = proc_self_readlink,
- .follow_link = proc_self_follow_link,
- .put_link = proc_self_put_link,
-};
-
-/*
- * proc base
- *
- * These are the directory entries in the root directory of /proc
- * that properly belong to the /proc filesystem, as they describe
- * describe something that is process related.
- */
-static const struct pid_entry proc_base_stuff[] = {
- NOD("self", S_IFLNK|S_IRWXUGO,
- &proc_self_inode_operations, NULL, {}),
-};
-
-static struct dentry *proc_base_instantiate(struct inode *dir,
- struct dentry *dentry, struct task_struct *task, const void *ptr)
-{
- const struct pid_entry *p = ptr;
- struct inode *inode;
- struct proc_inode *ei;
- struct dentry *error;
-
- /* Allocate the inode */
- error = ERR_PTR(-ENOMEM);
- inode = new_inode(dir->i_sb);
- if (!inode)
- goto out;
-
- /* Initialize the inode */
- ei = PROC_I(inode);
- inode->i_ino = get_next_ino();
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-
- /*
- * grab the reference to the task.
- */
- ei->pid = get_task_pid(task, PIDTYPE_PID);
- if (!ei->pid)
- goto out_iput;
-
- inode->i_mode = p->mode;
- if (S_ISDIR(inode->i_mode))
- set_nlink(inode, 2);
- if (S_ISLNK(inode->i_mode))
- inode->i_size = 64;
- if (p->iop)
- inode->i_op = p->iop;
- if (p->fop)
- inode->i_fop = p->fop;
- ei->op = p->op;
- d_add(dentry, inode);
- error = NULL;
-out:
- return error;
-out_iput:
- iput(inode);
- goto out;
-}
-
-static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
-{
- struct dentry *error;
- struct task_struct *task = get_proc_task(dir);
- const struct pid_entry *p, *last;
-
- error = ERR_PTR(-ENOENT);
-
- if (!task)
- goto out_no_task;
-
- /* Lookup the directory entry */
- last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
- for (p = proc_base_stuff; p <= last; p++) {
- if (p->len != dentry->d_name.len)
- continue;
- if (!memcmp(dentry->d_name.name, p->name, p->len))
- break;
- }
- if (p > last)
- goto out;
-
- error = proc_base_instantiate(dir, dentry, task, p);
-
-out:
- put_task_struct(task);
-out_no_task:
- return error;
-}
-
-static int proc_base_fill_cache(struct file *filp, void *dirent,
- filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
-{
- return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
- proc_base_instantiate, task, p);
-}
-
#ifdef CONFIG_TASK_IO_ACCOUNTING
static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
{
@@ -2839,10 +2699,6 @@ void proc_flush_task(struct task_struct *task)
proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
tgid->numbers[i].nr);
}
-
- upid = &pid->numbers[pid->level];
- if (upid->nr == 1)
- pid_ns_release_proc(upid->ns);
}
static struct dentry *proc_pid_instantiate(struct inode *dir,
@@ -2876,15 +2732,11 @@ out:
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
- struct dentry *result;
+ struct dentry *result = NULL;
struct task_struct *task;
unsigned tgid;
struct pid_namespace *ns;
- result = proc_base_lookup(dir, dentry);
- if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
- goto out;
-
tgid = name_to_int(dentry);
if (tgid == ~0U)
goto out;
@@ -2947,7 +2799,7 @@ retry:
return iter;
}
-#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff))
+#define TGID_OFFSET (FIRST_PROCESS_ENTRY)
static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
struct tgid_iter iter)
@@ -2967,25 +2819,12 @@ static int fake_filldir(void *buf, const char *name, int namelen,
/* for the /proc/ directory itself, after non-process stuff has been done */
int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
- unsigned int nr;
- struct task_struct *reaper;
struct tgid_iter iter;
struct pid_namespace *ns;
filldir_t __filldir;
if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
- goto out_no_task;
- nr = filp->f_pos - FIRST_PROCESS_ENTRY;
-
- reaper = get_proc_task(filp->f_path.dentry->d_inode);
- if (!reaper)
- goto out_no_task;
-
- for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
- const struct pid_entry *p = &proc_base_stuff[nr];
- if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
- goto out;
- }
+ goto out;
ns = filp->f_dentry->d_sb->s_fs_info;
iter.task = NULL;
@@ -3006,8 +2845,6 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
}
filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
out:
- put_task_struct(reaper);
-out_no_task:
return 0;
}
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 0d80cef4cfb9..7b3ae3cc0ef9 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -350,14 +350,14 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
* Return an inode number between PROC_DYNAMIC_FIRST and
* 0xffffffff, or zero on failure.
*/
-static unsigned int get_inode_number(void)
+int proc_alloc_inum(unsigned int *inum)
{
unsigned int i;
int error;
retry:
- if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
- return 0;
+ if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
+ return -ENOMEM;
spin_lock(&proc_inum_lock);
error = ida_get_new(&proc_inum_ida, &i);
@@ -365,18 +365,19 @@ retry:
if (error == -EAGAIN)
goto retry;
else if (error)
- return 0;
+ return error;
if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
spin_lock(&proc_inum_lock);
ida_remove(&proc_inum_ida, i);
spin_unlock(&proc_inum_lock);
- return 0;
+ return -ENOSPC;
}
- return PROC_DYNAMIC_FIRST + i;
+ *inum = PROC_DYNAMIC_FIRST + i;
+ return 0;
}
-static void release_inode_number(unsigned int inum)
+void proc_free_inum(unsigned int inum)
{
spin_lock(&proc_inum_lock);
ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
@@ -554,13 +555,12 @@ static const struct inode_operations proc_dir_inode_operations = {
static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
{
- unsigned int i;
struct proc_dir_entry *tmp;
+ int ret;
- i = get_inode_number();
- if (i == 0)
- return -EAGAIN;
- dp->low_ino = i;
+ ret = proc_alloc_inum(&dp->low_ino);
+ if (ret)
+ return ret;
if (S_ISDIR(dp->mode)) {
if (dp->proc_iops == NULL) {
@@ -764,7 +764,7 @@ EXPORT_SYMBOL(proc_create_data);
static void free_proc_entry(struct proc_dir_entry *de)
{
- release_inode_number(de->low_ino);
+ proc_free_inum(de->low_ino);
if (S_ISLNK(de->mode))
kfree(de->data);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 3b22bbdee9ec..439ae6886507 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -31,6 +31,7 @@ static void proc_evict_inode(struct inode *inode)
struct proc_dir_entry *de;
struct ctl_table_header *head;
const struct proc_ns_operations *ns_ops;
+ void *ns;
truncate_inode_pages(&inode->i_data, 0);
clear_inode(inode);
@@ -49,8 +50,9 @@ static void proc_evict_inode(struct inode *inode)
}
/* Release any associated namespace */
ns_ops = PROC_I(inode)->ns_ops;
- if (ns_ops && ns_ops->put)
- ns_ops->put(PROC_I(inode)->ns);
+ ns = PROC_I(inode)->ns;
+ if (ns_ops && ns)
+ ns_ops->put(ns);
}
static struct kmem_cache * proc_inode_cachep;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 43973b084abf..252544c05207 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -15,6 +15,7 @@ struct ctl_table_header;
struct mempolicy;
extern struct proc_dir_entry proc_root;
+extern void proc_self_init(void);
#ifdef CONFIG_PROC_SYSCTL
extern int proc_sys_init(void);
extern void sysctl_head_put(struct ctl_table_header *head);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b178ed733c36..b7a47196c8c3 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -11,6 +11,7 @@
#include <net/net_namespace.h>
#include <linux/ipc_namespace.h>
#include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
#include "internal.h"
@@ -24,12 +25,168 @@ static const struct proc_ns_operations *ns_entries[] = {
#ifdef CONFIG_IPC_NS
&ipcns_operations,
#endif
+#ifdef CONFIG_PID_NS
+ &pidns_operations,
+#endif
+#ifdef CONFIG_USER_NS
+ &userns_operations,
+#endif
+ &mntns_operations,
};
static const struct file_operations ns_file_operations = {
.llseek = no_llseek,
};
+static const struct inode_operations ns_inode_operations = {
+ .setattr = proc_setattr,
+};
+
+static int ns_delete_dentry(const struct dentry *dentry)
+{
+ /* Don't cache namespace inodes when not in use */
+ return 1;
+}
+
+static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+ struct inode *inode = dentry->d_inode;
+ const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
+
+ return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
+ ns_ops->name, inode->i_ino);
+}
+
+const struct dentry_operations ns_dentry_operations =
+{
+ .d_delete = ns_delete_dentry,
+ .d_dname = ns_dname,
+};
+
+static struct dentry *proc_ns_get_dentry(struct super_block *sb,
+ struct task_struct *task, const struct proc_ns_operations *ns_ops)
+{
+ struct dentry *dentry, *result;
+ struct inode *inode;
+ struct proc_inode *ei;
+ struct qstr qname = { .name = "", };
+ void *ns;
+
+ ns = ns_ops->get(task);
+ if (!ns)
+ return ERR_PTR(-ENOENT);
+
+ dentry = d_alloc_pseudo(sb, &qname);
+ if (!dentry) {
+ ns_ops->put(ns);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ inode = iget_locked(sb, ns_ops->inum(ns));
+ if (!inode) {
+ dput(dentry);
+ ns_ops->put(ns);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ei = PROC_I(inode);
+ if (inode->i_state & I_NEW) {
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_op = &ns_inode_operations;
+ inode->i_mode = S_IFREG | S_IRUGO;
+ inode->i_fop = &ns_file_operations;
+ ei->ns_ops = ns_ops;
+ ei->ns = ns;
+ unlock_new_inode(inode);
+ } else {
+ ns_ops->put(ns);
+ }
+
+ d_set_d_op(dentry, &ns_dentry_operations);
+ result = d_instantiate_unique(dentry, inode);
+ if (result) {
+ dput(dentry);
+ dentry = result;
+ }
+
+ return dentry;
+}
+
+static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct inode *inode = dentry->d_inode;
+ struct super_block *sb = inode->i_sb;
+ struct proc_inode *ei = PROC_I(inode);
+ struct task_struct *task;
+ struct dentry *ns_dentry;
+ void *error = ERR_PTR(-EACCES);
+
+ task = get_proc_task(inode);
+ if (!task)
+ goto out;
+
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ goto out_put_task;
+
+ ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops);
+ if (IS_ERR(ns_dentry)) {
+ error = ERR_CAST(ns_dentry);
+ goto out_put_task;
+ }
+
+ dput(nd->path.dentry);
+ nd->path.dentry = ns_dentry;
+ error = NULL;
+
+out_put_task:
+ put_task_struct(task);
+out:
+ return error;
+}
+
+static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
+{
+ struct inode *inode = dentry->d_inode;
+ struct proc_inode *ei = PROC_I(inode);
+ const struct proc_ns_operations *ns_ops = ei->ns_ops;
+ struct task_struct *task;
+ void *ns;
+ char name[50];
+ int len = -EACCES;
+
+ task = get_proc_task(inode);
+ if (!task)
+ goto out;
+
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ goto out_put_task;
+
+ len = -ENOENT;
+ ns = ns_ops->get(task);
+ if (!ns)
+ goto out_put_task;
+
+ snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
+ len = strlen(name);
+
+ if (len > buflen)
+ len = buflen;
+ if (copy_to_user(buffer, name, len))
+ len = -EFAULT;
+
+ ns_ops->put(ns);
+out_put_task:
+ put_task_struct(task);
+out:
+ return len;
+}
+
+static const struct inode_operations proc_ns_link_inode_operations = {
+ .readlink = proc_ns_readlink,
+ .follow_link = proc_ns_follow_link,
+ .setattr = proc_setattr,
+};
+
static struct dentry *proc_ns_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
@@ -37,21 +194,15 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
struct inode *inode;
struct proc_inode *ei;
struct dentry *error = ERR_PTR(-ENOENT);
- void *ns;
inode = proc_pid_make_inode(dir->i_sb, task);
if (!inode)
goto out;
- ns = ns_ops->get(task);
- if (!ns)
- goto out_iput;
-
ei = PROC_I(inode);
- inode->i_mode = S_IFREG|S_IRUSR;
- inode->i_fop = &ns_file_operations;
- ei->ns_ops = ns_ops;
- ei->ns = ns;
+ inode->i_mode = S_IFLNK|S_IRWXUGO;
+ inode->i_op = &proc_ns_link_inode_operations;
+ ei->ns_ops = ns_ops;
d_set_d_op(dentry, &pid_dentry_operations);
d_add(dentry, inode);
@@ -60,9 +211,6 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
error = NULL;
out:
return error;
-out_iput:
- iput(inode);
- goto out;
}
static int proc_ns_fill_cache(struct file *filp, void *dirent,
@@ -89,10 +237,6 @@ static int proc_ns_dir_readdir(struct file *filp, void *dirent,
if (!task)
goto out_no_task;
- ret = -EPERM;
- if (!ptrace_may_access(task, PTRACE_MODE_READ))
- goto out;
-
ret = 0;
i = filp->f_pos;
switch (i) {
@@ -152,10 +296,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
if (!task)
goto out_no_task;
- error = ERR_PTR(-EPERM);
- if (!ptrace_may_access(task, PTRACE_MODE_READ))
- goto out;
-
last = &ns_entries[ARRAY_SIZE(ns_entries)];
for (entry = ns_entries; entry < last; entry++) {
if (strlen((*entry)->name) != len)
@@ -163,7 +303,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
if (!memcmp(dentry->d_name.name, (*entry)->name, len))
break;
}
- error = ERR_PTR(-ENOENT);
if (entry == last)
goto out;
@@ -198,3 +337,7 @@ out_invalid:
return ERR_PTR(-EINVAL);
}
+bool proc_ns_inode(struct inode *inode)
+{
+ return inode->i_fop == &ns_file_operations;
+}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9889a92d2e01..c6e9fac26bac 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -100,14 +100,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
int err;
struct super_block *sb;
struct pid_namespace *ns;
- struct proc_inode *ei;
char *options;
if (flags & MS_KERNMOUNT) {
ns = (struct pid_namespace *)data;
options = NULL;
} else {
- ns = current->nsproxy->pid_ns;
+ ns = task_active_pid_ns(current);
options = data;
}
@@ -130,13 +129,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
sb->s_flags |= MS_ACTIVE;
}
- ei = PROC_I(sb->s_root->d_inode);
- if (!ei->pid) {
- rcu_read_lock();
- ei->pid = get_pid(find_pid_ns(1, ns));
- rcu_read_unlock();
- }
-
return dget(sb->s_root);
}
@@ -153,6 +145,7 @@ static struct file_system_type proc_fs_type = {
.name = "proc",
.mount = proc_mount,
.kill_sb = proc_kill_sb,
+ .fs_flags = FS_USERNS_MOUNT,
};
void __init proc_root_init(void)
@@ -163,12 +156,8 @@ void __init proc_root_init(void)
err = register_filesystem(&proc_fs_type);
if (err)
return;
- err = pid_ns_prepare_proc(&init_pid_ns);
- if (err) {
- unregister_filesystem(&proc_fs_type);
- return;
- }
+ proc_self_init();
proc_symlink("mounts", NULL, "self/mounts");
proc_net_init();
diff --git a/fs/proc/self.c b/fs/proc/self.c
new file mode 100644
index 000000000000..aa5cc3bff140
--- /dev/null
+++ b/fs/proc/self.c
@@ -0,0 +1,59 @@
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <linux/namei.h>
+
+/*
+ * /proc/self:
+ */
+static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
+ int buflen)
+{
+ struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+ pid_t tgid = task_tgid_nr_ns(current, ns);
+ char tmp[PROC_NUMBUF];
+ if (!tgid)
+ return -ENOENT;
+ sprintf(tmp, "%d", tgid);
+ return vfs_readlink(dentry,buffer,buflen,tmp);
+}
+
+static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+ pid_t tgid = task_tgid_nr_ns(current, ns);
+ char *name = ERR_PTR(-ENOENT);
+ if (tgid) {
+ /* 11 for max length of signed int in decimal + NULL term */
+ name = kmalloc(12, GFP_KERNEL);
+ if (!name)
+ name = ERR_PTR(-ENOMEM);
+ else
+ sprintf(name, "%d", tgid);
+ }
+ nd_set_link(nd, name);
+ return NULL;
+}
+
+static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
+ void *cookie)
+{
+ char *s = nd_get_link(nd);
+ if (!IS_ERR(s))
+ kfree(s);
+}
+
+static const struct inode_operations proc_self_inode_operations = {
+ .readlink = proc_self_readlink,
+ .follow_link = proc_self_follow_link,
+ .put_link = proc_self_put_link,
+};
+
+void __init proc_self_init(void)
+{
+ struct proc_dir_entry *proc_self_symlink;
+ mode_t mode;
+
+ mode = S_IFLNK | S_IRWXUGO;
+ proc_self_symlink = proc_create("self", mode, NULL, NULL );
+ proc_self_symlink->proc_iops = &proc_self_inode_operations;
+}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 71eb7e253927..db940a9be045 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -149,6 +149,7 @@ static struct file_system_type sysfs_fs_type = {
.name = "sysfs",
.mount = sysfs_mount,
.kill_sb = sysfs_kill_sb,
+ .fs_flags = FS_USERNS_MOUNT,
};
int __init sysfs_init(void)