diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/cgroup.c | 24 | ||||
-rw-r--r-- | kernel/fork.c | 5 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 50 | ||||
-rw-r--r-- | kernel/sysctl.c | 9 | ||||
-rw-r--r-- | kernel/ucount.c | 235 | ||||
-rw-r--r-- | kernel/user_namespace.c | 99 | ||||
-rw-r--r-- | kernel/utsname.c | 40 |
8 files changed, 440 insertions, 24 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index e2ec54e2b952..eb26e12c6c2a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,7 @@ obj-y = fork.o exec_domain.o panic.o \ extable.o params.o \ kthread.o sys_ni.o nsproxy.o \ notifier.o ksysfs.o cred.o reboot.o \ - async.o range.o smpboot.o + async.o range.o smpboot.o ucount.o obj-$(CONFIG_MULTIUSER) += groups.o diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9ba28310eab6..44066158f0d1 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -6328,6 +6328,16 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd) /* cgroup namespaces */ +static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns) +{ + return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES); +} + +static void dec_cgroup_namespaces(struct ucounts *ucounts) +{ + dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES); +} + static struct cgroup_namespace *alloc_cgroup_ns(void) { struct cgroup_namespace *new_ns; @@ -6349,6 +6359,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void) void free_cgroup_ns(struct cgroup_namespace *ns) { put_css_set(ns->root_cset); + dec_cgroup_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); kfree(ns); @@ -6360,6 +6371,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, struct cgroup_namespace *old_ns) { struct cgroup_namespace *new_ns; + struct ucounts *ucounts; struct css_set *cset; BUG_ON(!old_ns); @@ -6373,6 +6385,10 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); + ucounts = inc_cgroup_namespaces(user_ns); + if (!ucounts) + return ERR_PTR(-ENOSPC); + /* It is not safe to take cgroup_mutex here */ spin_lock_irq(&css_set_lock); cset = task_css_set(current); @@ -6382,10 +6398,12 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, new_ns = alloc_cgroup_ns(); if (IS_ERR(new_ns)) { put_css_set(cset); + dec_cgroup_namespaces(ucounts); return new_ns; } new_ns->user_ns = get_user_ns(user_ns); + new_ns->ucounts = ucounts; new_ns->root_cset = cset; return new_ns; @@ -6436,12 +6454,18 @@ static void cgroupns_put(struct ns_common *ns) put_cgroup_ns(to_cg_ns(ns)); } +static struct user_namespace *cgroupns_owner(struct ns_common *ns) +{ + return to_cg_ns(ns)->user_ns; +} + const struct proc_ns_operations cgroupns_operations = { .name = "cgroup", .type = CLONE_NEWCGROUP, .get = cgroupns_get, .put = cgroupns_put, .install = cgroupns_install, + .owner = cgroupns_owner, }; static __init int cgroup_namespaces_init(void) diff --git a/kernel/fork.c b/kernel/fork.c index c060c7e7c247..9a05bd93f8e7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -418,6 +418,7 @@ int arch_task_struct_size __read_mostly; void __init fork_init(void) { + int i; #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES @@ -437,6 +438,10 @@ void __init fork_init(void) init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_NPROC]; + + for (i = 0; i < UCOUNT_COUNTS; i++) { + init_user_ns.ucount_max[i] = max_threads/2; + } } int __weak arch_dup_task_struct(struct task_struct *dst, diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index a65ba137fd15..df9e8e9e0be7 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -79,23 +79,36 @@ static void proc_cleanup_work(struct work_struct *work) /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ #define MAX_PID_NS_LEVEL 32 +static struct ucounts *inc_pid_namespaces(struct user_namespace *ns) +{ + return inc_ucount(ns, current_euid(), UCOUNT_PID_NAMESPACES); +} + +static void dec_pid_namespaces(struct ucounts *ucounts) +{ + dec_ucount(ucounts, UCOUNT_PID_NAMESPACES); +} + static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns, struct pid_namespace *parent_pid_ns) { struct pid_namespace *ns; unsigned int level = parent_pid_ns->level + 1; + struct ucounts *ucounts; int i; int err; - if (level > MAX_PID_NS_LEVEL) { - err = -EINVAL; + err = -ENOSPC; + if (level > MAX_PID_NS_LEVEL) + goto out; + ucounts = inc_pid_namespaces(user_ns); + if (!ucounts) goto out; - } err = -ENOMEM; ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); if (ns == NULL) - goto out; + goto out_dec; ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!ns->pidmap[0].page) @@ -114,6 +127,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns ns->level = level; ns->parent = get_pid_ns(parent_pid_ns); ns->user_ns = get_user_ns(user_ns); + ns->ucounts = ucounts; ns->nr_hashed = PIDNS_HASH_ADDING; INIT_WORK(&ns->proc_work, proc_cleanup_work); @@ -129,6 +143,8 @@ out_free_map: kfree(ns->pidmap[0].page); out_free: kmem_cache_free(pid_ns_cachep, ns); +out_dec: + dec_pid_namespaces(ucounts); out: return ERR_PTR(err); } @@ -146,6 +162,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns) ns_free_inum(&ns->ns); for (i = 0; i < PIDMAP_ENTRIES; i++) kfree(ns->pidmap[i].page); + dec_pid_namespaces(ns->ucounts); put_user_ns(ns->user_ns); call_rcu(&ns->rcu, delayed_free_pidns); } @@ -388,12 +405,37 @@ static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns) return 0; } +static struct ns_common *pidns_get_parent(struct ns_common *ns) +{ + struct pid_namespace *active = task_active_pid_ns(current); + struct pid_namespace *pid_ns, *p; + + /* See if the parent is in the current namespace */ + pid_ns = p = to_pid_ns(ns)->parent; + for (;;) { + if (!p) + return ERR_PTR(-EPERM); + if (p == active) + break; + p = p->parent; + } + + return &get_pid_ns(pid_ns)->ns; +} + +static struct user_namespace *pidns_owner(struct ns_common *ns) +{ + return to_pid_ns(ns)->user_ns; +} + const struct proc_ns_operations pidns_operations = { .name = "pid", .type = CLONE_NEWPID, .get = pidns_get, .put = pidns_put, .install = pidns_install, + .owner = pidns_owner, + .get_parent = pidns_get_parent, }; static __init int pid_namespaces_init(void) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a13bbdaab47d..a43775c6646c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -65,6 +65,7 @@ #include <linux/sched/sysctl.h> #include <linux/kexec.h> #include <linux/bpf.h> +#include <linux/mount.h> #include <asm/uaccess.h> #include <asm/processor.h> @@ -1838,6 +1839,14 @@ static struct ctl_table fs_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, + { + .procname = "mount-max", + .data = &sysctl_mount_max, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + }, { } }; diff --git a/kernel/ucount.c b/kernel/ucount.c new file mode 100644 index 000000000000..9d20d5dd298a --- /dev/null +++ b/kernel/ucount.c @@ -0,0 +1,235 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ + +#include <linux/stat.h> +#include <linux/sysctl.h> +#include <linux/slab.h> +#include <linux/hash.h> +#include <linux/user_namespace.h> + +#define UCOUNTS_HASHTABLE_BITS 10 +static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)]; +static DEFINE_SPINLOCK(ucounts_lock); + +#define ucounts_hashfn(ns, uid) \ + hash_long((unsigned long)__kuid_val(uid) + (unsigned long)(ns), \ + UCOUNTS_HASHTABLE_BITS) +#define ucounts_hashentry(ns, uid) \ + (ucounts_hashtable + ucounts_hashfn(ns, uid)) + + +#ifdef CONFIG_SYSCTL +static struct ctl_table_set * +set_lookup(struct ctl_table_root *root) +{ + return ¤t_user_ns()->set; +} + +static int set_is_seen(struct ctl_table_set *set) +{ + return ¤t_user_ns()->set == set; +} + +static int set_permissions(struct ctl_table_header *head, + struct ctl_table *table) +{ + struct user_namespace *user_ns = + container_of(head->set, struct user_namespace, set); + int mode; + + /* Allow users with CAP_SYS_RESOURCE unrestrained access */ + if (ns_capable(user_ns, CAP_SYS_RESOURCE)) + mode = (table->mode & S_IRWXU) >> 6; + else + /* Allow all others at most read-only access */ + mode = table->mode & S_IROTH; + return (mode << 6) | (mode << 3) | mode; +} + +static struct ctl_table_root set_root = { + .lookup = set_lookup, + .permissions = set_permissions, +}; + +static int zero = 0; +static int int_max = INT_MAX; +#define UCOUNT_ENTRY(name) \ + { \ + .procname = name, \ + .maxlen = sizeof(int), \ + .mode = 0644, \ + .proc_handler = proc_dointvec_minmax, \ + .extra1 = &zero, \ + .extra2 = &int_max, \ + } +static struct ctl_table user_table[] = { + UCOUNT_ENTRY("max_user_namespaces"), + UCOUNT_ENTRY("max_pid_namespaces"), + UCOUNT_ENTRY("max_uts_namespaces"), + UCOUNT_ENTRY("max_ipc_namespaces"), + UCOUNT_ENTRY("max_net_namespaces"), + UCOUNT_ENTRY("max_mnt_namespaces"), + UCOUNT_ENTRY("max_cgroup_namespaces"), + { } +}; +#endif /* CONFIG_SYSCTL */ + +bool setup_userns_sysctls(struct user_namespace *ns) +{ +#ifdef CONFIG_SYSCTL + struct ctl_table *tbl; + setup_sysctl_set(&ns->set, &set_root, set_is_seen); + tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL); + if (tbl) { + int i; + for (i = 0; i < UCOUNT_COUNTS; i++) { + tbl[i].data = &ns->ucount_max[i]; + } + ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl); + } + if (!ns->sysctls) { + kfree(tbl); + retire_sysctl_set(&ns->set); + return false; + } +#endif + return true; +} + +void retire_userns_sysctls(struct user_namespace *ns) +{ +#ifdef CONFIG_SYSCTL + struct ctl_table *tbl; + + tbl = ns->sysctls->ctl_table_arg; + unregister_sysctl_table(ns->sysctls); + retire_sysctl_set(&ns->set); + kfree(tbl); +#endif +} + +static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent) +{ + struct ucounts *ucounts; + + hlist_for_each_entry(ucounts, hashent, node) { + if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns)) + return ucounts; + } + return NULL; +} + +static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) +{ + struct hlist_head *hashent = ucounts_hashentry(ns, uid); + struct ucounts *ucounts, *new; + + spin_lock(&ucounts_lock); + ucounts = find_ucounts(ns, uid, hashent); + if (!ucounts) { + spin_unlock(&ucounts_lock); + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return NULL; + + new->ns = ns; + new->uid = uid; + atomic_set(&new->count, 0); + + spin_lock(&ucounts_lock); + ucounts = find_ucounts(ns, uid, hashent); + if (ucounts) { + kfree(new); + } else { + hlist_add_head(&new->node, hashent); + ucounts = new; + } + } + if (!atomic_add_unless(&ucounts->count, 1, INT_MAX)) + ucounts = NULL; + spin_unlock(&ucounts_lock); + return ucounts; +} + +static void put_ucounts(struct ucounts *ucounts) +{ + if (atomic_dec_and_test(&ucounts->count)) { + spin_lock(&ucounts_lock); + hlist_del_init(&ucounts->node); + spin_unlock(&ucounts_lock); + + kfree(ucounts); + } +} + +static inline bool atomic_inc_below(atomic_t *v, int u) +{ + int c, old; + c = atomic_read(v); + for (;;) { + if (unlikely(c >= u)) + return false; + old = atomic_cmpxchg(v, c, c+1); + if (likely(old == c)) + return true; + c = old; + } +} + +struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, + enum ucount_type type) +{ + struct ucounts *ucounts, *iter, *bad; + struct user_namespace *tns; + ucounts = get_ucounts(ns, uid); + for (iter = ucounts; iter; iter = tns->ucounts) { + int max; + tns = iter->ns; + max = READ_ONCE(tns->ucount_max[type]); + if (!atomic_inc_below(&iter->ucount[type], max)) + goto fail; + } + return ucounts; +fail: + bad = iter; + for (iter = ucounts; iter != bad; iter = iter->ns->ucounts) + atomic_dec(&iter->ucount[type]); + + put_ucounts(ucounts); + return NULL; +} + +void dec_ucount(struct ucounts *ucounts, enum ucount_type type) +{ + struct ucounts *iter; + for (iter = ucounts; iter; iter = iter->ns->ucounts) { + int dec = atomic_dec_if_positive(&iter->ucount[type]); + WARN_ON_ONCE(dec < 0); + } + put_ucounts(ucounts); +} + +static __init int user_namespace_sysctl_init(void) +{ +#ifdef CONFIG_SYSCTL + static struct ctl_table_header *user_header; + static struct ctl_table empty[1]; + /* + * It is necessary to register the user directory in the + * default set so that registrations in the child sets work + * properly. + */ + user_header = register_sysctl("user", empty); + BUG_ON(!user_header); + BUG_ON(!setup_userns_sysctls(&init_user_ns)); +#endif + return 0; +} +subsys_initcall(user_namespace_sysctl_init); + + diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 68f594212759..86b7854fec8e 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -29,6 +29,17 @@ static DEFINE_MUTEX(userns_state_mutex); static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, struct uid_gid_map *map); +static void free_user_ns(struct work_struct *work); + +static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid) +{ + return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES); +} + +static void dec_user_namespaces(struct ucounts *ucounts) +{ + return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES); +} static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) { @@ -62,10 +73,16 @@ int create_user_ns(struct cred *new) struct user_namespace *ns, *parent_ns = new->user_ns; kuid_t owner = new->euid; kgid_t group = new->egid; - int ret; + struct ucounts *ucounts; + int ret, i; + ret = -ENOSPC; if (parent_ns->level > 32) - return -EUSERS; + goto fail; + + ucounts = inc_user_namespaces(parent_ns, owner); + if (!ucounts) + goto fail; /* * Verify that we can not violate the policy of which files @@ -73,26 +90,27 @@ int create_user_ns(struct cred *new) * by verifing that the root directory is at the root of the * mount namespace which allows all files to be accessed. */ + ret = -EPERM; if (current_chrooted()) - return -EPERM; + goto fail_dec; /* The creator needs a mapping in the parent user namespace * or else we won't be able to reasonably tell userspace who * created a user_namespace. */ + ret = -EPERM; if (!kuid_has_mapping(parent_ns, owner) || !kgid_has_mapping(parent_ns, group)) - return -EPERM; + goto fail_dec; + ret = -ENOMEM; ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); if (!ns) - return -ENOMEM; + goto fail_dec; ret = ns_alloc_inum(&ns->ns); - if (ret) { - kmem_cache_free(user_ns_cachep, ns); - return ret; - } + if (ret) + goto fail_free; ns->ns.ops = &userns_operations; atomic_set(&ns->count, 1); @@ -101,18 +119,37 @@ int create_user_ns(struct cred *new) ns->level = parent_ns->level + 1; ns->owner = owner; ns->group = group; + INIT_WORK(&ns->work, free_user_ns); + for (i = 0; i < UCOUNT_COUNTS; i++) { + ns->ucount_max[i] = INT_MAX; + } + ns->ucounts = ucounts; /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ mutex_lock(&userns_state_mutex); ns->flags = parent_ns->flags; mutex_unlock(&userns_state_mutex); - set_cred_user_ns(new, ns); - #ifdef CONFIG_PERSISTENT_KEYRINGS init_rwsem(&ns->persistent_keyring_register_sem); #endif + ret = -ENOMEM; + if (!setup_userns_sysctls(ns)) + goto fail_keyring; + + set_cred_user_ns(new, ns); return 0; +fail_keyring: +#ifdef CONFIG_PERSISTENT_KEYRINGS + key_put(ns->persistent_keyring_register); +#endif + ns_free_inum(&ns->ns); +fail_free: + kmem_cache_free(user_ns_cachep, ns); +fail_dec: + dec_user_namespaces(ucounts); +fail: + return ret; } int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) @@ -135,21 +172,30 @@ int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) return err; } -void free_user_ns(struct user_namespace *ns) +static void free_user_ns(struct work_struct *work) { - struct user_namespace *parent; + struct user_namespace *parent, *ns = + container_of(work, struct user_namespace, work); do { + struct ucounts *ucounts = ns->ucounts; parent = ns->parent; + retire_userns_sysctls(ns); #ifdef CONFIG_PERSISTENT_KEYRINGS key_put(ns->persistent_keyring_register); #endif ns_free_inum(&ns->ns); kmem_cache_free(user_ns_cachep, ns); + dec_user_namespaces(ucounts); ns = parent; } while (atomic_dec_and_test(&parent->count)); } -EXPORT_SYMBOL(free_user_ns); + +void __put_user_ns(struct user_namespace *ns) +{ + schedule_work(&ns->work); +} +EXPORT_SYMBOL(__put_user_ns); static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) { @@ -1004,12 +1050,37 @@ static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns) return commit_creds(cred); } +struct ns_common *ns_get_owner(struct ns_common *ns) +{ + struct user_namespace *my_user_ns = current_user_ns(); + struct user_namespace *owner, *p; + + /* See if the owner is in the current user namespace */ + owner = p = ns->ops->owner(ns); + for (;;) { + if (!p) + return ERR_PTR(-EPERM); + if (p == my_user_ns) + break; + p = p->parent; + } + + return &get_user_ns(owner)->ns; +} + +static struct user_namespace *userns_owner(struct ns_common *ns) +{ + return to_user_ns(ns)->parent; +} + const struct proc_ns_operations userns_operations = { .name = "user", .type = CLONE_NEWUSER, .get = userns_get, .put = userns_put, .install = userns_install, + .owner = userns_owner, + .get_parent = ns_get_owner, }; static __init int user_namespaces_init(void) diff --git a/kernel/utsname.c b/kernel/utsname.c index 831ea7108232..6976cd47dcf6 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -17,6 +17,16 @@ #include <linux/user_namespace.h> #include <linux/proc_ns.h> +static struct ucounts *inc_uts_namespaces(struct user_namespace *ns) +{ + return inc_ucount(ns, current_euid(), UCOUNT_UTS_NAMESPACES); +} + +static void dec_uts_namespaces(struct ucounts *ucounts) +{ + dec_ucount(ucounts, UCOUNT_UTS_NAMESPACES); +} + static struct uts_namespace *create_uts_ns(void) { struct uts_namespace *uts_ns; @@ -36,18 +46,24 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, struct uts_namespace *old_ns) { struct uts_namespace *ns; + struct ucounts *ucounts; int err; + err = -ENOSPC; + ucounts = inc_uts_namespaces(user_ns); + if (!ucounts) + goto fail; + + err = -ENOMEM; ns = create_uts_ns(); if (!ns) - return ERR_PTR(-ENOMEM); + goto fail_dec; err = ns_alloc_inum(&ns->ns); - if (err) { - kfree(ns); - return ERR_PTR(err); - } + if (err) + goto fail_free; + ns->ucounts = ucounts; ns->ns.ops = &utsns_operations; down_read(&uts_sem); @@ -55,6 +71,13 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, ns->user_ns = get_user_ns(user_ns); up_read(&uts_sem); return ns; + +fail_free: + kfree(ns); +fail_dec: + dec_uts_namespaces(ucounts); +fail: + return ERR_PTR(err); } /* @@ -85,6 +108,7 @@ void free_uts_ns(struct kref *kref) struct uts_namespace *ns; ns = container_of(kref, struct uts_namespace, kref); + dec_uts_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); kfree(ns); @@ -130,10 +154,16 @@ static int utsns_install(struct nsproxy *nsproxy, struct ns_common *new) return 0; } +static struct user_namespace *utsns_owner(struct ns_common *ns) +{ + return to_uts_ns(ns)->user_ns; +} + const struct proc_ns_operations utsns_operations = { .name = "uts", .type = CLONE_NEWUTS, .get = utsns_get, .put = utsns_put, .install = utsns_install, + .owner = utsns_owner, }; |