diff options
author | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2020-03-18 13:35:58 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-18 13:35:58 +0100 |
commit | 8c357762c7cb5ee9cc699f7f7aad980d6cfe7efb (patch) | |
tree | 866a52607c3d251f1de995c3931d7af032e77539 /src | |
parent | sd-bus: sd_bus_call/sd_bus_call_async_docs + cleanups. (diff) | |
parent | core: add support for setting CPUAffinity= to special "numa" value (diff) | |
download | systemd-8c357762c7cb5ee9cc699f7f7aad980d6cfe7efb.tar.xz systemd-8c357762c7cb5ee9cc699f7f7aad980d6cfe7efb.zip |
Merge pull request #14749 from msekletar/cpu-aff-numa-v3
core: add support for setting CPUAffinity= to special "numa" value
Diffstat (limited to 'src')
-rw-r--r-- | src/core/dbus-execute.c | 30 | ||||
-rw-r--r-- | src/core/execute.c | 46 | ||||
-rw-r--r-- | src/core/execute.h | 4 | ||||
-rw-r--r-- | src/core/load-fragment.c | 14 | ||||
-rw-r--r-- | src/shared/bus-unit-util.c | 9 | ||||
-rw-r--r-- | src/shared/cpu-set-util.c | 91 | ||||
-rw-r--r-- | src/shared/cpu-set-util.h | 27 | ||||
-rw-r--r-- | src/shared/meson.build | 2 | ||||
-rw-r--r-- | src/shared/numa-util.c | 135 | ||||
-rw-r--r-- | src/shared/numa-util.h | 33 | ||||
-rw-r--r-- | src/systemctl/systemctl.c | 1 | ||||
-rw-r--r-- | src/test/test-cpu-set-util.c | 6 |
12 files changed, 275 insertions, 123 deletions
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index d8ba3e5d92..e8be76e315 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -56,6 +56,8 @@ static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_priority, "i", ExecContext, static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_string, "s", NULL); static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_level, "i", int, LOG_PRI); static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_facility, "i", int, LOG_FAC); +static BUS_DEFINE_PROPERTY_GET(property_get_cpu_affinity_from_numa, "b", ExecContext, exec_context_get_cpu_affinity_from_numa); + static int property_get_environment_files( sd_bus *bus, @@ -213,6 +215,7 @@ static int property_get_cpu_affinity( sd_bus_error *error) { ExecContext *c = userdata; + _cleanup_(cpu_set_reset) CPUSet s = {}; _cleanup_free_ uint8_t *array = NULL; size_t allocated; @@ -220,7 +223,16 @@ static int property_get_cpu_affinity( assert(reply); assert(c); - (void) cpu_set_to_dbus(&c->cpu_set, &array, &allocated); + if (c->cpu_affinity_from_numa) { + int r; + + r = numa_to_cpu_set(&c->numa_policy, &s); + if (r < 0) + return r; + } + + (void) cpu_set_to_dbus(c->cpu_affinity_from_numa ? &s : &c->cpu_set, &array, &allocated); + return sd_bus_message_append_array(reply, 'y', array, allocated); } @@ -741,6 +753,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("CPUSchedulingPolicy", "i", property_get_cpu_sched_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("CPUSchedulingPriority", "i", property_get_cpu_sched_priority, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("CPUAffinity", "ay", property_get_cpu_affinity, 0, SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("CPUAffinityFromNUMA", "b", property_get_cpu_affinity_from_numa, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("NUMAPolicy", "i", property_get_numa_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("NUMAMask", "ay", property_get_numa_mask, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST), @@ -1770,6 +1783,20 @@ int bus_exec_context_set_transient_property( return 1; + } else if (streq(name, "CPUAffinityFromNUMA")) { + int q; + + r = sd_bus_message_read_basic(message, 'b', &q); + if (r < 0) + return r; + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + c->cpu_affinity_from_numa = q; + unit_write_settingf(u, flags, name, "%s=%s", "CPUAffinity", "numa"); + } + + return 1; + } else if (streq(name, "NUMAPolicy")) { int32_t type; @@ -1784,6 +1811,7 @@ int bus_exec_context_set_transient_property( c->numa_policy.type = type; return 1; + } else if (streq(name, "Nice")) { int32_t q; diff --git a/src/core/execute.c b/src/core/execute.c index 00a2f2e17e..8e1e77b4b2 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -3021,6 +3021,33 @@ static int exec_parameters_get_cgroup_path(const ExecParameters *params, char ** return using_subcgroup; } +static int exec_context_cpu_affinity_from_numa(const ExecContext *c, CPUSet *ret) { + _cleanup_(cpu_set_reset) CPUSet s = {}; + int r; + + assert(c); + assert(ret); + + if (!c->numa_policy.nodes.set) { + log_debug("Can't derive CPU affinity mask from NUMA mask because NUMA mask is not set, ignoring"); + return 0; + } + + r = numa_to_cpu_set(&c->numa_policy, &s); + if (r < 0) + return r; + + cpu_set_reset(ret); + + return cpu_set_add_all(ret, &s); +} + +bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c) { + assert(c); + + return c->cpu_affinity_from_numa; +} + static int exec_child( Unit *unit, const ExecCommand *command, @@ -3318,11 +3345,26 @@ static int exec_child( } } - if (context->cpu_set.set) - if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) { + if (context->cpu_affinity_from_numa || context->cpu_set.set) { + _cleanup_(cpu_set_reset) CPUSet converted_cpu_set = {}; + const CPUSet *cpu_set; + + if (context->cpu_affinity_from_numa) { + r = exec_context_cpu_affinity_from_numa(context, &converted_cpu_set); + if (r < 0) { + *exit_status = EXIT_CPUAFFINITY; + return log_unit_error_errno(unit, r, "Failed to derive CPU affinity mask from NUMA mask: %m"); + } + + cpu_set = &converted_cpu_set; + } else + cpu_set = &context->cpu_set; + + if (sched_setaffinity(0, cpu_set->allocated, cpu_set->set) < 0) { *exit_status = EXIT_CPUAFFINITY; return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m"); } + } if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) { r = apply_numa_policy(&context->numa_policy); diff --git a/src/core/execute.h b/src/core/execute.h index 09c1510aaf..4baf5b1a40 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -21,6 +21,7 @@ typedef struct Manager Manager; #include "missing_resource.h" #include "namespace.h" #include "nsflags.h" +#include "numa-util.h" #include "time-util.h" #define EXEC_STDIN_DATA_MAX (64U*1024U*1024U) @@ -181,6 +182,7 @@ struct ExecContext { CPUSet cpu_set; NUMAPolicy numa_policy; + bool cpu_affinity_from_numa; ExecInput std_input; ExecOutput std_output; @@ -405,6 +407,8 @@ void exec_runtime_vacuum(Manager *m); void exec_params_clear(ExecParameters *p); +bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c); + const char* exec_output_to_string(ExecOutput i) _const_; ExecOutput exec_output_from_string(const char *s) _pure_; diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 71a9873da4..646364eb89 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -1330,13 +1330,25 @@ int config_parse_exec_cpu_affinity(const char *unit, void *userdata) { ExecContext *c = data; + int r; assert(filename); assert(lvalue); assert(rvalue); assert(data); - return parse_cpu_set_extend(rvalue, &c->cpu_set, true, unit, filename, line, lvalue); + if (streq(rvalue, "numa")) { + c->cpu_affinity_from_numa = true; + cpu_set_reset(&c->cpu_set); + + return 0; + } + + r = parse_cpu_set_extend(rvalue, &c->cpu_set, true, unit, filename, line, lvalue); + if (r >= 0) + c->cpu_affinity_from_numa = false; + + return r; } int config_parse_capability_set( diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 28d85944a8..a30876c1a1 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -21,6 +21,7 @@ #include "missing_fs.h" #include "mountpoint-util.h" #include "nsflags.h" +#include "numa-util.h" #include "parse-util.h" #include "process-util.h" #include "rlimit-util.h" @@ -28,6 +29,7 @@ #include "signal-util.h" #include "socket-util.h" #include "sort-util.h" +#include "stdio-util.h" #include "string-util.h" #include "syslog-util.h" #include "terminal-util.h" @@ -1102,6 +1104,13 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con _cleanup_free_ uint8_t *array = NULL; size_t allocated; + if (eq && streq(eq, "numa")) { + r = sd_bus_message_append(m, "(sv)", "CPUAffinityFromNUMA", "b", true); + if (r < 0) + return bus_log_create_error(r); + return r; + } + r = parse_cpu_set(eq, &cpuset); if (r < 0) return log_error_errno(r, "Failed to parse %s value: %s", field, eq); diff --git a/src/shared/cpu-set-util.c b/src/shared/cpu-set-util.c index 219314ef58..9b9238362f 100644 --- a/src/shared/cpu-set-util.c +++ b/src/shared/cpu-set-util.c @@ -14,11 +14,9 @@ #include "log.h" #include "macro.h" #include "memory-util.h" -#include "missing_syscall.h" #include "parse-util.h" #include "stat-util.h" #include "string-util.h" -#include "string-table.h" #include "strv.h" #include "util.h" @@ -133,7 +131,7 @@ int cpu_set_add_all(CPUSet *a, const CPUSet *b) { return r; } - return 0; + return 1; } int parse_cpu_set_full( @@ -218,7 +216,7 @@ int parse_cpu_set_extend( if (!old->set) { *old = cpuset; cpuset = (CPUSet) {}; - return 0; + return 1; } return cpu_set_add_all(old, &cpuset); @@ -295,88 +293,3 @@ int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set) { s = (CPUSet) {}; return 0; } - -bool numa_policy_is_valid(const NUMAPolicy *policy) { - assert(policy); - - if (!mpol_is_valid(numa_policy_get_type(policy))) - return false; - - if (!policy->nodes.set && - !IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL, MPOL_PREFERRED)) - return false; - - if (policy->nodes.set && - numa_policy_get_type(policy) == MPOL_PREFERRED && - CPU_COUNT_S(policy->nodes.allocated, policy->nodes.set) != 1) - return false; - - return true; -} - -static int numa_policy_to_mempolicy(const NUMAPolicy *policy, unsigned long *ret_maxnode, unsigned long **ret_nodes) { - unsigned node, bits = 0, ulong_bits; - _cleanup_free_ unsigned long *out = NULL; - - assert(policy); - assert(ret_maxnode); - assert(ret_nodes); - - if (IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL) || - (numa_policy_get_type(policy) == MPOL_PREFERRED && !policy->nodes.set)) { - *ret_nodes = NULL; - *ret_maxnode = 0; - return 0; - } - - bits = policy->nodes.allocated * 8; - ulong_bits = sizeof(unsigned long) * 8; - - out = new0(unsigned long, DIV_ROUND_UP(policy->nodes.allocated, sizeof(unsigned long))); - if (!out) - return -ENOMEM; - - /* We don't make any assumptions about internal type libc is using to store NUMA node mask. - Hence we need to convert the node mask to the representation expected by set_mempolicy() */ - for (node = 0; node < bits; node++) - if (CPU_ISSET_S(node, policy->nodes.allocated, policy->nodes.set)) - out[node / ulong_bits] |= 1ul << (node % ulong_bits); - - *ret_nodes = TAKE_PTR(out); - *ret_maxnode = bits + 1; - return 0; -} - -int apply_numa_policy(const NUMAPolicy *policy) { - int r; - _cleanup_free_ unsigned long *nodes = NULL; - unsigned long maxnode; - - assert(policy); - - if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS) - return -EOPNOTSUPP; - - if (!numa_policy_is_valid(policy)) - return -EINVAL; - - r = numa_policy_to_mempolicy(policy, &maxnode, &nodes); - if (r < 0) - return r; - - r = set_mempolicy(numa_policy_get_type(policy), nodes, maxnode); - if (r < 0) - return -errno; - - return 0; -} - -static const char* const mpol_table[] = { - [MPOL_DEFAULT] = "default", - [MPOL_PREFERRED] = "preferred", - [MPOL_BIND] = "bind", - [MPOL_INTERLEAVE] = "interleave", - [MPOL_LOCAL] = "local", -}; - -DEFINE_STRING_TABLE_LOOKUP(mpol, int); diff --git a/src/shared/cpu-set-util.h b/src/shared/cpu-set-util.h index 27812dfd59..a60d4ec41b 100644 --- a/src/shared/cpu-set-util.h +++ b/src/shared/cpu-set-util.h @@ -49,30 +49,3 @@ int cpu_set_to_dbus(const CPUSet *set, uint8_t **ret, size_t *allocated); int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set); int cpus_in_affinity_mask(void); - -static inline bool mpol_is_valid(int t) { - return t >= MPOL_DEFAULT && t <= MPOL_LOCAL; -} - -typedef struct NUMAPolicy { - /* Always use numa_policy_get_type() to read the value */ - int type; - CPUSet nodes; -} NUMAPolicy; - -bool numa_policy_is_valid(const NUMAPolicy *p); - -static inline int numa_policy_get_type(const NUMAPolicy *p) { - return p->type < 0 ? (p->nodes.set ? MPOL_PREFERRED : -1) : p->type; -} - -static inline void numa_policy_reset(NUMAPolicy *p) { - assert(p); - cpu_set_reset(&p->nodes); - p->type = -1; -} - -int apply_numa_policy(const NUMAPolicy *policy); - -const char* mpol_to_string(int i) _const_; -int mpol_from_string(const char *s) _pure_; diff --git a/src/shared/meson.build b/src/shared/meson.build index fa080f8e62..94174347a1 100644 --- a/src/shared/meson.build +++ b/src/shared/meson.build @@ -147,6 +147,8 @@ shared_sources = files(''' nscd-flush.h nsflags.c nsflags.h + numa-util.c + numa-util.h openssl-util.h os-util.c os-util.h diff --git a/src/shared/numa-util.c b/src/shared/numa-util.c new file mode 100644 index 0000000000..187992dc69 --- /dev/null +++ b/src/shared/numa-util.c @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <sched.h> + +#include "alloc-util.h" +#include "cpu-set-util.h" +#include "fileio.h" +#include "macro.h" +#include "missing_syscall.h" +#include "numa-util.h" +#include "stdio-util.h" +#include "string-table.h" + +bool numa_policy_is_valid(const NUMAPolicy *policy) { + assert(policy); + + if (!mpol_is_valid(numa_policy_get_type(policy))) + return false; + + if (!policy->nodes.set && + !IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL, MPOL_PREFERRED)) + return false; + + if (policy->nodes.set && + numa_policy_get_type(policy) == MPOL_PREFERRED && + CPU_COUNT_S(policy->nodes.allocated, policy->nodes.set) != 1) + return false; + + return true; +} + +static int numa_policy_to_mempolicy(const NUMAPolicy *policy, unsigned long *ret_maxnode, unsigned long **ret_nodes) { + unsigned node, bits = 0, ulong_bits; + _cleanup_free_ unsigned long *out = NULL; + + assert(policy); + assert(ret_maxnode); + assert(ret_nodes); + + if (IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL) || + (numa_policy_get_type(policy) == MPOL_PREFERRED && !policy->nodes.set)) { + *ret_nodes = NULL; + *ret_maxnode = 0; + return 0; + } + + bits = policy->nodes.allocated * 8; + ulong_bits = sizeof(unsigned long) * 8; + + out = new0(unsigned long, DIV_ROUND_UP(policy->nodes.allocated, sizeof(unsigned long))); + if (!out) + return -ENOMEM; + + /* We don't make any assumptions about internal type libc is using to store NUMA node mask. + Hence we need to convert the node mask to the representation expected by set_mempolicy() */ + for (node = 0; node < bits; node++) + if (CPU_ISSET_S(node, policy->nodes.allocated, policy->nodes.set)) + out[node / ulong_bits] |= 1ul << (node % ulong_bits); + + *ret_nodes = TAKE_PTR(out); + *ret_maxnode = bits + 1; + return 0; +} + +int apply_numa_policy(const NUMAPolicy *policy) { + int r; + _cleanup_free_ unsigned long *nodes = NULL; + unsigned long maxnode; + + assert(policy); + + if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS) + return -EOPNOTSUPP; + + if (!numa_policy_is_valid(policy)) + return -EINVAL; + + r = numa_policy_to_mempolicy(policy, &maxnode, &nodes); + if (r < 0) + return r; + + r = set_mempolicy(numa_policy_get_type(policy), nodes, maxnode); + if (r < 0) + return -errno; + + return 0; +} + +int numa_to_cpu_set(const NUMAPolicy *policy, CPUSet *ret) { + int r; + size_t i; + _cleanup_(cpu_set_reset) CPUSet s = {}; + + assert(policy); + assert(ret); + + for (i = 0; i < policy->nodes.allocated * 8; i++) { + _cleanup_free_ char *l = NULL; + char p[STRLEN("/sys/devices/system/node/node//cpulist") + DECIMAL_STR_MAX(size_t) + 1]; + _cleanup_(cpu_set_reset) CPUSet part = {}; + + if (!CPU_ISSET_S(i, policy->nodes.allocated, policy->nodes.set)) + continue; + + xsprintf(p, "/sys/devices/system/node/node%zu/cpulist", i); + + r = read_one_line_file(p, &l); + if (r < 0) + return r; + + r = parse_cpu_set(l, &part); + if (r < 0) + return r; + + r = cpu_set_add_all(&s, &part); + if (r < 0) + return r; + } + + *ret = s; + s = (CPUSet) {}; + + return 0; +} + +static const char* const mpol_table[] = { + [MPOL_DEFAULT] = "default", + [MPOL_PREFERRED] = "preferred", + [MPOL_BIND] = "bind", + [MPOL_INTERLEAVE] = "interleave", + [MPOL_LOCAL] = "local", +}; + +DEFINE_STRING_TABLE_LOOKUP(mpol, int); diff --git a/src/shared/numa-util.h b/src/shared/numa-util.h new file mode 100644 index 0000000000..c99178903b --- /dev/null +++ b/src/shared/numa-util.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "cpu-set-util.h" +#include "missing_syscall.h" + +static inline bool mpol_is_valid(int t) { + return t >= MPOL_DEFAULT && t <= MPOL_LOCAL; +} + +typedef struct NUMAPolicy { + /* Always use numa_policy_get_type() to read the value */ + int type; + CPUSet nodes; +} NUMAPolicy; + +bool numa_policy_is_valid(const NUMAPolicy *p); + +static inline int numa_policy_get_type(const NUMAPolicy *p) { + return p->type < 0 ? (p->nodes.set ? MPOL_PREFERRED : -1) : p->type; +} + +static inline void numa_policy_reset(NUMAPolicy *p) { + assert(p); + cpu_set_reset(&p->nodes); + p->type = -1; +} + +int apply_numa_policy(const NUMAPolicy *policy); +int numa_to_cpu_set(const NUMAPolicy *policy, CPUSet *set); + +const char* mpol_to_string(int i) _const_; +int mpol_from_string(const char *s) _pure_; diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index ba3fe9ec2f..f8b0adc334 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -58,6 +58,7 @@ #include "main-func.h" #include "memory-util.h" #include "mkdir.h" +#include "numa-util.h" #include "pager.h" #include "parse-util.h" #include "path-lookup.h" diff --git a/src/test/test-cpu-set-util.c b/src/test/test-cpu-set-util.c index e1dd2eb32b..450e19e06f 100644 --- a/src/test/test-cpu-set-util.c +++ b/src/test/test-cpu-set-util.c @@ -216,12 +216,12 @@ static void test_parse_cpu_set_extend(void) { log_info("/* %s */", __func__); - assert_se(parse_cpu_set_extend("1 3", &c, true, NULL, "fake", 1, "CPUAffinity") == 0); + assert_se(parse_cpu_set_extend("1 3", &c, true, NULL, "fake", 1, "CPUAffinity") == 1); assert_se(CPU_COUNT_S(c.allocated, c.set) == 2); assert_se(s1 = cpu_set_to_string(&c)); log_info("cpu_set_to_string: %s", s1); - assert_se(parse_cpu_set_extend("4", &c, true, NULL, "fake", 1, "CPUAffinity") == 0); + assert_se(parse_cpu_set_extend("4", &c, true, NULL, "fake", 1, "CPUAffinity") == 1); assert_se(CPU_COUNT_S(c.allocated, c.set) == 3); assert_se(s2 = cpu_set_to_string(&c)); log_info("cpu_set_to_string: %s", s2); @@ -238,7 +238,7 @@ static void test_cpu_set_to_from_dbus(void) { log_info("/* %s */", __func__); - assert_se(parse_cpu_set_extend("1 3 8 100-200", &c, true, NULL, "fake", 1, "CPUAffinity") == 0); + assert_se(parse_cpu_set_extend("1 3 8 100-200", &c, true, NULL, "fake", 1, "CPUAffinity") == 1); assert_se(s = cpu_set_to_string(&c)); log_info("cpu_set_to_string: %s", s); assert_se(CPU_COUNT_S(c.allocated, c.set) == 104); |