diff options
author | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2018-11-30 10:09:00 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-11-30 10:09:00 +0100 |
commit | b2ac2b01c8ddaabbe63bcb7168dd752f44320d86 (patch) | |
tree | 91b74d45e81f52ee09019fa33e92ab926eac2f5d /src/core | |
parent | docs: turn LGTM URL into a markdown link (diff) | |
parent | update TODO (diff) | |
download | systemd-b2ac2b01c8ddaabbe63bcb7168dd752f44320d86.tar.xz systemd-b2ac2b01c8ddaabbe63bcb7168dd752f44320d86.zip |
Merge pull request #10996 from poettering/oci-prep
Preparation for the nspawn-OCI work
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/bpf-devices.c | 26 | ||||
-rw-r--r-- | src/core/bpf-devices.h | 1 | ||||
-rw-r--r-- | src/core/cgroup.c | 104 | ||||
-rw-r--r-- | src/core/mount-setup.c | 17 |
4 files changed, 105 insertions, 43 deletions
diff --git a/src/core/bpf-devices.c b/src/core/bpf-devices.c index d8915244a7..dade7f0490 100644 --- a/src/core/bpf-devices.c +++ b/src/core/bpf-devices.c @@ -84,6 +84,32 @@ int cgroup_bpf_whitelist_major(BPFProgram *prog, int type, int major, const char return r; } +int cgroup_bpf_whitelist_class(BPFProgram *prog, int type, const char *acc) { + struct bpf_insn insn[] = { + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 5), /* compare device type */ + BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */ + BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 1), /* compare access type */ + BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */ + }; + int r, access; + + assert(prog); + assert(acc); + + access = bpf_access_type(acc); + if (access <= 0) + return -EINVAL; + + insn[2].imm = access; + + r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn)); + if (r < 0) + log_error_errno(r, "Extending device control BPF program failed: %m"); + + return r; +} + int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist) { struct bpf_insn pre_insn[] = { /* load device type to r2 */ diff --git a/src/core/bpf-devices.h b/src/core/bpf-devices.h index f9a6eec028..8d3de3bd94 100644 --- a/src/core/bpf-devices.h +++ b/src/core/bpf-devices.h @@ -11,6 +11,7 @@ int bpf_devices_supported(void); int cgroup_bpf_whitelist_device(BPFProgram *p, int type, int major, int minor, const char *acc); int cgroup_bpf_whitelist_major(BPFProgram *p, int type, int major, const char *acc); +int cgroup_bpf_whitelist_class(BPFProgram *prog, int type, const char *acc); int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist); int cgroup_apply_device_bpf(Unit *u, BPFProgram *p, CGroupDevicePolicy policy, bool whitelist); diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 598b396186..11f9611b71 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -19,6 +19,7 @@ #include "process-util.h" #include "procfs-util.h" #include "special.h" +#include "stat-util.h" #include "stdio-util.h" #include "string-table.h" #include "string-util.h" @@ -375,16 +376,23 @@ int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) } static int lookup_block_device(const char *p, dev_t *ret) { - struct stat st; + struct stat st = {}; int r; assert(p); assert(ret); - if (stat(p, &st) < 0) - return log_warning_errno(errno, "Couldn't stat device '%s': %m", p); - - if (S_ISBLK(st.st_mode)) + r = device_path_parse_major_minor(p, &st.st_mode, &st.st_rdev); + if (r == -ENODEV) { /* not a parsable device node, need to go to disk */ + if (stat(p, &st) < 0) + return log_warning_errno(errno, "Couldn't stat device '%s': %m", p); + } else if (r < 0) + return log_warning_errno(r, "Failed to parse major/minor from path '%s': %m", p); + + if (S_ISCHR(st.st_mode)) { + log_warning("Device node '%s' is a character device, but block device needed.", p); + return -ENOTBLK; + } else if (S_ISBLK(st.st_mode)) *ret = st.st_rdev; else if (major(st.st_dev) != 0) *ret = st.st_dev; /* If this is not a device node then use the block device this file is stored on */ @@ -408,30 +416,27 @@ static int lookup_block_device(const char *p, dev_t *ret) { } static int whitelist_device(BPFProgram *prog, const char *path, const char *node, const char *acc) { - struct stat st; - bool ignore_notfound; + struct stat st = {}; int r; assert(path); assert(acc); - if (node[0] == '-') { - /* Non-existent paths starting with "-" must be silently ignored */ - node++; - ignore_notfound = true; - } else - ignore_notfound = false; - - if (stat(node, &st) < 0) { - if (errno == ENOENT && ignore_notfound) - return 0; + /* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and + * /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This + * means clients can use these path without the device node actually around */ + r = device_path_parse_major_minor(node, &st.st_mode, &st.st_rdev); + if (r < 0) { + if (r != -ENODEV) + return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node); - return log_warning_errno(errno, "Couldn't stat device %s: %m", node); - } + if (stat(node, &st) < 0) + return log_warning_errno(errno, "Couldn't stat device %s: %m", node); - if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { - log_warning("%s is not a device.", node); - return -ENODEV; + if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { + log_warning("%s is not a device.", node); + return -ENODEV; + } } if (cg_all_unified() > 0) { @@ -463,21 +468,64 @@ static int whitelist_device(BPFProgram *prog, const char *path, const char *node static int whitelist_major(BPFProgram *prog, const char *path, const char *name, char type, const char *acc) { _cleanup_fclose_ FILE *f = NULL; - char *p, *w; + char buf[2+DECIMAL_STR_MAX(unsigned)+3+4]; bool good = false; + unsigned maj; int r; assert(path); assert(acc); assert(IN_SET(type, 'b', 'c')); + if (streq(name, "*")) { + /* If the name is a wildcard, then apply this list to all devices of this type */ + + if (cg_all_unified() > 0) { + if (!prog) + return 0; + + (void) cgroup_bpf_whitelist_class(prog, type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK, acc); + } else { + xsprintf(buf, "%c *:* %s", type, acc); + + r = cg_set_attribute("devices", path, "devices.allow", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set devices.allow on %s: %m", path); + return 0; + } + } + + if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj)) { + /* The name is numeric and suitable as major. In that case, let's take is major, and create the entry + * directly */ + + if (cg_all_unified() > 0) { + if (!prog) + return 0; + + (void) cgroup_bpf_whitelist_major(prog, + type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK, + maj, acc); + } else { + xsprintf(buf, "%c %u:* %s", type, maj, acc); + + r = cg_set_attribute("devices", path, "devices.allow", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set devices.allow on %s: %m", path); + } + + return 0; + } + f = fopen("/proc/devices", "re"); if (!f) return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s (%c): %m", name, type); for (;;) { _cleanup_free_ char *line = NULL; - unsigned maj; + char *w, *p; r = read_line(f, LONG_LINE_MAX, &line); if (r < 0) @@ -530,8 +578,6 @@ static int whitelist_major(BPFProgram *prog, const char *path, const char *name, type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK, maj, acc); } else { - char buf[2+DECIMAL_STR_MAX(unsigned)+3+4]; - sprintf(buf, "%c %u:* %s", type, @@ -1098,8 +1144,8 @@ static void cgroup_context_apply( "/dev/tty\0" "rwm\0" "/dev/ptmx\0" "rwm\0" /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */ - "-/run/systemd/inaccessible/chr\0" "rwm\0" - "-/run/systemd/inaccessible/blk\0" "rwm\0"; + "/run/systemd/inaccessible/chr\0" "rwm\0" + "/run/systemd/inaccessible/blk\0" "rwm\0"; const char *x, *y; @@ -1133,7 +1179,7 @@ static void cgroup_context_apply( else if ((val = startswith(a->path, "char-"))) (void) whitelist_major(prog, path, val, 'c', acc); else - log_unit_debug(u, "Ignoring device %s while writing cgroup attribute.", a->path); + log_unit_debug(u, "Ignoring device '%s' while writing cgroup attribute.", a->path); } r = cgroup_apply_device_bpf(u, prog, c->device_policy, c->device_allow); diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index a118d83b1b..a686b64e66 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -460,20 +460,9 @@ int mount_setup(bool loaded_policy) { (void) mkdir_label("/run/systemd", 0755); (void) mkdir_label("/run/systemd/system", 0755); - /* Set up inaccessible (and empty) file nodes of all types */ - (void) mkdir_label("/run/systemd/inaccessible", 0000); - (void) mknod("/run/systemd/inaccessible/reg", S_IFREG | 0000, 0); - (void) mkdir_label("/run/systemd/inaccessible/dir", 0000); - (void) mkfifo("/run/systemd/inaccessible/fifo", 0000); - (void) mknod("/run/systemd/inaccessible/sock", S_IFSOCK | 0000, 0); - - /* The following two are likely to fail if we lack the privs for it (for example in an userns environment, if - * CAP_SYS_MKNOD is missing, or if a device node policy prohibit major/minor of 0 device nodes to be - * created). But that's entirely fine. Consumers of these files should carry fallback to use a different node - * then, for example /run/systemd/inaccessible/sock, which is close enough in behaviour and semantics for most - * uses. */ - (void) mknod("/run/systemd/inaccessible/chr", S_IFCHR | 0000, makedev(0, 0)); - (void) mknod("/run/systemd/inaccessible/blk", S_IFBLK | 0000, makedev(0, 0)); + /* Also create /run/systemd/inaccessible nodes, so that we always have something to mount inaccessible nodes + * from. */ + (void) make_inaccessible_nodes(NULL, UID_INVALID, GID_INVALID); return 0; } |