diff options
-rw-r--r-- | Documentation/admin-guide/kernel-parameters.txt | 12 | ||||
-rw-r--r-- | Documentation/core-api/workqueue.rst | 63 | ||||
-rw-r--r-- | include/linux/workqueue.h | 5 | ||||
-rw-r--r-- | kernel/workqueue.c | 110 | ||||
-rw-r--r-- | tools/workqueue/wq_dump.py | 15 |
5 files changed, 193 insertions, 12 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2b89cbc39713..732c5c7e3fa5 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7007,6 +7007,18 @@ The default value of this parameter is determined by the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT. + workqueue.default_affinity_scope= + Select the default affinity scope to use for unbound + workqueues. Can be one of "cpu", "smt", "cache", + "numa" and "system". Default is "cache". For more + information, see the Affinity Scopes section in + Documentation/core-api/workqueue.rst. + + This can be updated after boot through the matching + file under /sys/module/workqueue/parameters. + However, the changed default will only apply to + unbound workqueues created afterwards. + workqueue.debug_force_rr_cpu Workqueue used to implicitly guarantee that work items queued without explicit CPU specified are put diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst index c9e46acd339b..56af317508c9 100644 --- a/Documentation/core-api/workqueue.rst +++ b/Documentation/core-api/workqueue.rst @@ -347,6 +347,51 @@ Guidelines level of locality in wq operations and work item execution. +Affinity Scopes +=============== + +An unbound workqueue groups CPUs according to its affinity scope to improve +cache locality. For example, if a workqueue is using the default affinity +scope of "cache", it will group CPUs according to last level cache +boundaries. A work item queued on the workqueue will be processed by a +worker running on one of the CPUs which share the last level cache with the +issuing CPU. + +Workqueue currently supports the following five affinity scopes. + +``cpu`` + CPUs are not grouped. A work item issued on one CPU is processed by a + worker on the same CPU. This makes unbound workqueues behave as per-cpu + workqueues without concurrency management. + +``smt`` + CPUs are grouped according to SMT boundaries. This usually means that the + logical threads of each physical CPU core are grouped together. + +``cache`` + CPUs are grouped according to cache boundaries. Which specific cache + boundary is used is determined by the arch code. L3 is used in a lot of + cases. This is the default affinity scope. + +``numa`` + CPUs are grouped according to NUMA bounaries. + +``system`` + All CPUs are put in the same group. Workqueue makes no effort to process a + work item on a CPU close to the issuing CPU. + +The default affinity scope can be changed with the module parameter +``workqueue.default_affinity_scope`` and a specific workqueue's affinity +scope can be changed using ``apply_workqueue_attrs()``. + +If ``WQ_SYSFS`` is set, the workqueue will have the following affinity scope +related interface files under its ``/sys/devices/virtual/WQ_NAME/`` +directory. + +``affinity_scope`` + Read to see the current affinity scope. Write to change. + + Examining Configuration ======================= @@ -358,6 +403,24 @@ configuration, worker pools and how workqueues map to the pools: :: =============== wq_unbound_cpumask=0000000f + CPU + nr_pods 4 + pod_cpus [0]=00000001 [1]=00000002 [2]=00000004 [3]=00000008 + pod_node [0]=0 [1]=0 [2]=1 [3]=1 + cpu_pod [0]=0 [1]=1 [2]=2 [3]=3 + + SMT + nr_pods 4 + pod_cpus [0]=00000001 [1]=00000002 [2]=00000004 [3]=00000008 + pod_node [0]=0 [1]=0 [2]=1 [3]=1 + cpu_pod [0]=0 [1]=1 [2]=2 [3]=3 + + CACHE (default) + nr_pods 2 + pod_cpus [0]=00000003 [1]=0000000c + pod_node [0]=0 [1]=1 + cpu_pod [0]=0 [1]=0 [2]=1 [3]=1 + NUMA nr_pods 2 pod_cpus [0]=00000003 [1]=0000000c diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 180491ee6706..568cfbc24bc0 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -126,12 +126,15 @@ struct rcu_work { }; enum wq_affn_scope { + WQ_AFFN_CPU, /* one pod per CPU */ + WQ_AFFN_SMT, /* one pod poer SMT */ + WQ_AFFN_CACHE, /* one pod per LLC */ WQ_AFFN_NUMA, /* one pod per NUMA node */ WQ_AFFN_SYSTEM, /* one pod across the whole system */ WQ_AFFN_NR_TYPES, - WQ_AFFN_DFL = WQ_AFFN_NUMA, + WQ_AFFN_DFL = WQ_AFFN_CACHE, }; /** diff --git a/kernel/workqueue.c b/kernel/workqueue.c index a2cc0432abd5..8e3a499c3f00 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -338,6 +338,15 @@ struct wq_pod_type { }; static struct wq_pod_type wq_pod_types[WQ_AFFN_NR_TYPES]; +static enum wq_affn_scope wq_affn_dfl = WQ_AFFN_DFL; + +static const char *wq_affn_names[WQ_AFFN_NR_TYPES] = { + [WQ_AFFN_CPU] = "cpu", + [WQ_AFFN_SMT] = "smt", + [WQ_AFFN_CACHE] = "cache", + [WQ_AFFN_NUMA] = "numa", + [WQ_AFFN_SYSTEM] = "system", +}; /* * Per-cpu work items which run for longer than the following threshold are @@ -3664,7 +3673,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(void) goto fail; cpumask_copy(attrs->cpumask, cpu_possible_mask); - attrs->affn_scope = WQ_AFFN_DFL; + attrs->affn_scope = wq_affn_dfl; return attrs; fail: free_workqueue_attrs(attrs); @@ -5777,19 +5786,55 @@ out_unlock: return ret; } +static int parse_affn_scope(const char *val) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(wq_affn_names); i++) { + if (!strncasecmp(val, wq_affn_names[i], strlen(wq_affn_names[i]))) + return i; + } + return -EINVAL; +} + +static int wq_affn_dfl_set(const char *val, const struct kernel_param *kp) +{ + int affn; + + affn = parse_affn_scope(val); + if (affn < 0) + return affn; + + wq_affn_dfl = affn; + return 0; +} + +static int wq_affn_dfl_get(char *buffer, const struct kernel_param *kp) +{ + return scnprintf(buffer, PAGE_SIZE, "%s\n", wq_affn_names[wq_affn_dfl]); +} + +static const struct kernel_param_ops wq_affn_dfl_ops = { + .set = wq_affn_dfl_set, + .get = wq_affn_dfl_get, +}; + +module_param_cb(default_affinity_scope, &wq_affn_dfl_ops, NULL, 0644); + #ifdef CONFIG_SYSFS /* * Workqueues with WQ_SYSFS flag set is visible to userland via * /sys/bus/workqueue/devices/WQ_NAME. All visible workqueues have the * following attributes. * - * per_cpu RO bool : whether the workqueue is per-cpu or unbound - * max_active RW int : maximum number of in-flight work items + * per_cpu RO bool : whether the workqueue is per-cpu or unbound + * max_active RW int : maximum number of in-flight work items * * Unbound workqueues have the following extra attributes. * - * nice RW int : nice value of the workers - * cpumask RW mask : bitmask of allowed CPUs for the workers + * nice RW int : nice value of the workers + * cpumask RW mask : bitmask of allowed CPUs for the workers + * affinity_scope RW str : worker CPU affinity scope (cache, numa, none) */ struct wq_device { struct workqueue_struct *wq; @@ -5932,9 +5977,47 @@ out_unlock: return ret ?: count; } +static ssize_t wq_affn_scope_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct workqueue_struct *wq = dev_to_wq(dev); + int written; + + mutex_lock(&wq->mutex); + written = scnprintf(buf, PAGE_SIZE, "%s\n", + wq_affn_names[wq->unbound_attrs->affn_scope]); + mutex_unlock(&wq->mutex); + + return written; +} + +static ssize_t wq_affn_scope_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct workqueue_struct *wq = dev_to_wq(dev); + struct workqueue_attrs *attrs; + int affn, ret = -ENOMEM; + + affn = parse_affn_scope(buf); + if (affn < 0) + return affn; + + apply_wqattrs_lock(); + attrs = wq_sysfs_prep_attrs(wq); + if (attrs) { + attrs->affn_scope = affn; + ret = apply_workqueue_attrs_locked(wq, attrs); + } + apply_wqattrs_unlock(); + free_workqueue_attrs(attrs); + return ret ?: count; +} + static struct device_attribute wq_sysfs_unbound_attrs[] = { __ATTR(nice, 0644, wq_nice_show, wq_nice_store), __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store), + __ATTR(affinity_scope, 0644, wq_affn_scope_show, wq_affn_scope_store), __ATTR_NULL, }; @@ -6537,6 +6620,20 @@ static void __init init_pod_type(struct wq_pod_type *pt, } } +static bool __init cpus_dont_share(int cpu0, int cpu1) +{ + return false; +} + +static bool __init cpus_share_smt(int cpu0, int cpu1) +{ +#ifdef CONFIG_SCHED_SMT + return cpumask_test_cpu(cpu0, cpu_smt_mask(cpu1)); +#else + return false; +#endif +} + static bool __init cpus_share_numa(int cpu0, int cpu1) { return cpu_to_node(cpu0) == cpu_to_node(cpu1); @@ -6554,6 +6651,9 @@ void __init workqueue_init_topology(void) struct workqueue_struct *wq; int cpu; + init_pod_type(&wq_pod_types[WQ_AFFN_CPU], cpus_dont_share); + init_pod_type(&wq_pod_types[WQ_AFFN_SMT], cpus_share_smt); + init_pod_type(&wq_pod_types[WQ_AFFN_CACHE], cpus_share_cache); init_pod_type(&wq_pod_types[WQ_AFFN_NUMA], cpus_share_numa); mutex_lock(&wq_pool_mutex); diff --git a/tools/workqueue/wq_dump.py b/tools/workqueue/wq_dump.py index ddd0bb4395ea..43ab71a193b8 100644 --- a/tools/workqueue/wq_dump.py +++ b/tools/workqueue/wq_dump.py @@ -78,11 +78,16 @@ worker_pool_idr = prog['worker_pool_idr'] workqueues = prog['workqueues'] wq_unbound_cpumask = prog['wq_unbound_cpumask'] wq_pod_types = prog['wq_pod_types'] +wq_affn_dfl = prog['wq_affn_dfl'] +wq_affn_names = prog['wq_affn_names'] WQ_UNBOUND = prog['WQ_UNBOUND'] WQ_ORDERED = prog['__WQ_ORDERED'] WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM'] +WQ_AFFN_CPU = prog['WQ_AFFN_CPU'] +WQ_AFFN_SMT = prog['WQ_AFFN_SMT'] +WQ_AFFN_CACHE = prog['WQ_AFFN_CACHE'] WQ_AFFN_NUMA = prog['WQ_AFFN_NUMA'] WQ_AFFN_SYSTEM = prog['WQ_AFFN_SYSTEM'] @@ -109,12 +114,10 @@ def print_pod_type(pt): print(f' [{cpu}]={pt.cpu_pod[cpu].value_()}', end='') print('') -print('') -print('NUMA') -print_pod_type(wq_pod_types[WQ_AFFN_NUMA]) -print('') -print('SYSTEM') -print_pod_type(wq_pod_types[WQ_AFFN_SYSTEM]) +for affn in [WQ_AFFN_CPU, WQ_AFFN_SMT, WQ_AFFN_CACHE, WQ_AFFN_NUMA, WQ_AFFN_SYSTEM]: + print('') + print(f'{wq_affn_names[affn].string_().decode().upper()}{" (default)" if affn == wq_affn_dfl else ""}') + print_pod_type(wq_pod_types[affn]) print('') print('Worker Pools') |