summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt12
-rw-r--r--Documentation/core-api/workqueue.rst63
-rw-r--r--include/linux/workqueue.h5
-rw-r--r--kernel/workqueue.c110
-rw-r--r--tools/workqueue/wq_dump.py15
5 files changed, 193 insertions, 12 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2b89cbc39713..732c5c7e3fa5 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -7007,6 +7007,18 @@
The default value of this parameter is determined by
the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
+ workqueue.default_affinity_scope=
+ Select the default affinity scope to use for unbound
+ workqueues. Can be one of "cpu", "smt", "cache",
+ "numa" and "system". Default is "cache". For more
+ information, see the Affinity Scopes section in
+ Documentation/core-api/workqueue.rst.
+
+ This can be updated after boot through the matching
+ file under /sys/module/workqueue/parameters.
+ However, the changed default will only apply to
+ unbound workqueues created afterwards.
+
workqueue.debug_force_rr_cpu
Workqueue used to implicitly guarantee that work
items queued without explicit CPU specified are put
diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst
index c9e46acd339b..56af317508c9 100644
--- a/Documentation/core-api/workqueue.rst
+++ b/Documentation/core-api/workqueue.rst
@@ -347,6 +347,51 @@ Guidelines
level of locality in wq operations and work item execution.
+Affinity Scopes
+===============
+
+An unbound workqueue groups CPUs according to its affinity scope to improve
+cache locality. For example, if a workqueue is using the default affinity
+scope of "cache", it will group CPUs according to last level cache
+boundaries. A work item queued on the workqueue will be processed by a
+worker running on one of the CPUs which share the last level cache with the
+issuing CPU.
+
+Workqueue currently supports the following five affinity scopes.
+
+``cpu``
+ CPUs are not grouped. A work item issued on one CPU is processed by a
+ worker on the same CPU. This makes unbound workqueues behave as per-cpu
+ workqueues without concurrency management.
+
+``smt``
+ CPUs are grouped according to SMT boundaries. This usually means that the
+ logical threads of each physical CPU core are grouped together.
+
+``cache``
+ CPUs are grouped according to cache boundaries. Which specific cache
+ boundary is used is determined by the arch code. L3 is used in a lot of
+ cases. This is the default affinity scope.
+
+``numa``
+ CPUs are grouped according to NUMA bounaries.
+
+``system``
+ All CPUs are put in the same group. Workqueue makes no effort to process a
+ work item on a CPU close to the issuing CPU.
+
+The default affinity scope can be changed with the module parameter
+``workqueue.default_affinity_scope`` and a specific workqueue's affinity
+scope can be changed using ``apply_workqueue_attrs()``.
+
+If ``WQ_SYSFS`` is set, the workqueue will have the following affinity scope
+related interface files under its ``/sys/devices/virtual/WQ_NAME/``
+directory.
+
+``affinity_scope``
+ Read to see the current affinity scope. Write to change.
+
+
Examining Configuration
=======================
@@ -358,6 +403,24 @@ configuration, worker pools and how workqueues map to the pools: ::
===============
wq_unbound_cpumask=0000000f
+ CPU
+ nr_pods 4
+ pod_cpus [0]=00000001 [1]=00000002 [2]=00000004 [3]=00000008
+ pod_node [0]=0 [1]=0 [2]=1 [3]=1
+ cpu_pod [0]=0 [1]=1 [2]=2 [3]=3
+
+ SMT
+ nr_pods 4
+ pod_cpus [0]=00000001 [1]=00000002 [2]=00000004 [3]=00000008
+ pod_node [0]=0 [1]=0 [2]=1 [3]=1
+ cpu_pod [0]=0 [1]=1 [2]=2 [3]=3
+
+ CACHE (default)
+ nr_pods 2
+ pod_cpus [0]=00000003 [1]=0000000c
+ pod_node [0]=0 [1]=1
+ cpu_pod [0]=0 [1]=0 [2]=1 [3]=1
+
NUMA
nr_pods 2
pod_cpus [0]=00000003 [1]=0000000c
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 180491ee6706..568cfbc24bc0 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -126,12 +126,15 @@ struct rcu_work {
};
enum wq_affn_scope {
+ WQ_AFFN_CPU, /* one pod per CPU */
+ WQ_AFFN_SMT, /* one pod poer SMT */
+ WQ_AFFN_CACHE, /* one pod per LLC */
WQ_AFFN_NUMA, /* one pod per NUMA node */
WQ_AFFN_SYSTEM, /* one pod across the whole system */
WQ_AFFN_NR_TYPES,
- WQ_AFFN_DFL = WQ_AFFN_NUMA,
+ WQ_AFFN_DFL = WQ_AFFN_CACHE,
};
/**
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a2cc0432abd5..8e3a499c3f00 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -338,6 +338,15 @@ struct wq_pod_type {
};
static struct wq_pod_type wq_pod_types[WQ_AFFN_NR_TYPES];
+static enum wq_affn_scope wq_affn_dfl = WQ_AFFN_DFL;
+
+static const char *wq_affn_names[WQ_AFFN_NR_TYPES] = {
+ [WQ_AFFN_CPU] = "cpu",
+ [WQ_AFFN_SMT] = "smt",
+ [WQ_AFFN_CACHE] = "cache",
+ [WQ_AFFN_NUMA] = "numa",
+ [WQ_AFFN_SYSTEM] = "system",
+};
/*
* Per-cpu work items which run for longer than the following threshold are
@@ -3664,7 +3673,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(void)
goto fail;
cpumask_copy(attrs->cpumask, cpu_possible_mask);
- attrs->affn_scope = WQ_AFFN_DFL;
+ attrs->affn_scope = wq_affn_dfl;
return attrs;
fail:
free_workqueue_attrs(attrs);
@@ -5777,19 +5786,55 @@ out_unlock:
return ret;
}
+static int parse_affn_scope(const char *val)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(wq_affn_names); i++) {
+ if (!strncasecmp(val, wq_affn_names[i], strlen(wq_affn_names[i])))
+ return i;
+ }
+ return -EINVAL;
+}
+
+static int wq_affn_dfl_set(const char *val, const struct kernel_param *kp)
+{
+ int affn;
+
+ affn = parse_affn_scope(val);
+ if (affn < 0)
+ return affn;
+
+ wq_affn_dfl = affn;
+ return 0;
+}
+
+static int wq_affn_dfl_get(char *buffer, const struct kernel_param *kp)
+{
+ return scnprintf(buffer, PAGE_SIZE, "%s\n", wq_affn_names[wq_affn_dfl]);
+}
+
+static const struct kernel_param_ops wq_affn_dfl_ops = {
+ .set = wq_affn_dfl_set,
+ .get = wq_affn_dfl_get,
+};
+
+module_param_cb(default_affinity_scope, &wq_affn_dfl_ops, NULL, 0644);
+
#ifdef CONFIG_SYSFS
/*
* Workqueues with WQ_SYSFS flag set is visible to userland via
* /sys/bus/workqueue/devices/WQ_NAME. All visible workqueues have the
* following attributes.
*
- * per_cpu RO bool : whether the workqueue is per-cpu or unbound
- * max_active RW int : maximum number of in-flight work items
+ * per_cpu RO bool : whether the workqueue is per-cpu or unbound
+ * max_active RW int : maximum number of in-flight work items
*
* Unbound workqueues have the following extra attributes.
*
- * nice RW int : nice value of the workers
- * cpumask RW mask : bitmask of allowed CPUs for the workers
+ * nice RW int : nice value of the workers
+ * cpumask RW mask : bitmask of allowed CPUs for the workers
+ * affinity_scope RW str : worker CPU affinity scope (cache, numa, none)
*/
struct wq_device {
struct workqueue_struct *wq;
@@ -5932,9 +5977,47 @@ out_unlock:
return ret ?: count;
}
+static ssize_t wq_affn_scope_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct workqueue_struct *wq = dev_to_wq(dev);
+ int written;
+
+ mutex_lock(&wq->mutex);
+ written = scnprintf(buf, PAGE_SIZE, "%s\n",
+ wq_affn_names[wq->unbound_attrs->affn_scope]);
+ mutex_unlock(&wq->mutex);
+
+ return written;
+}
+
+static ssize_t wq_affn_scope_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct workqueue_struct *wq = dev_to_wq(dev);
+ struct workqueue_attrs *attrs;
+ int affn, ret = -ENOMEM;
+
+ affn = parse_affn_scope(buf);
+ if (affn < 0)
+ return affn;
+
+ apply_wqattrs_lock();
+ attrs = wq_sysfs_prep_attrs(wq);
+ if (attrs) {
+ attrs->affn_scope = affn;
+ ret = apply_workqueue_attrs_locked(wq, attrs);
+ }
+ apply_wqattrs_unlock();
+ free_workqueue_attrs(attrs);
+ return ret ?: count;
+}
+
static struct device_attribute wq_sysfs_unbound_attrs[] = {
__ATTR(nice, 0644, wq_nice_show, wq_nice_store),
__ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
+ __ATTR(affinity_scope, 0644, wq_affn_scope_show, wq_affn_scope_store),
__ATTR_NULL,
};
@@ -6537,6 +6620,20 @@ static void __init init_pod_type(struct wq_pod_type *pt,
}
}
+static bool __init cpus_dont_share(int cpu0, int cpu1)
+{
+ return false;
+}
+
+static bool __init cpus_share_smt(int cpu0, int cpu1)
+{
+#ifdef CONFIG_SCHED_SMT
+ return cpumask_test_cpu(cpu0, cpu_smt_mask(cpu1));
+#else
+ return false;
+#endif
+}
+
static bool __init cpus_share_numa(int cpu0, int cpu1)
{
return cpu_to_node(cpu0) == cpu_to_node(cpu1);
@@ -6554,6 +6651,9 @@ void __init workqueue_init_topology(void)
struct workqueue_struct *wq;
int cpu;
+ init_pod_type(&wq_pod_types[WQ_AFFN_CPU], cpus_dont_share);
+ init_pod_type(&wq_pod_types[WQ_AFFN_SMT], cpus_share_smt);
+ init_pod_type(&wq_pod_types[WQ_AFFN_CACHE], cpus_share_cache);
init_pod_type(&wq_pod_types[WQ_AFFN_NUMA], cpus_share_numa);
mutex_lock(&wq_pool_mutex);
diff --git a/tools/workqueue/wq_dump.py b/tools/workqueue/wq_dump.py
index ddd0bb4395ea..43ab71a193b8 100644
--- a/tools/workqueue/wq_dump.py
+++ b/tools/workqueue/wq_dump.py
@@ -78,11 +78,16 @@ worker_pool_idr = prog['worker_pool_idr']
workqueues = prog['workqueues']
wq_unbound_cpumask = prog['wq_unbound_cpumask']
wq_pod_types = prog['wq_pod_types']
+wq_affn_dfl = prog['wq_affn_dfl']
+wq_affn_names = prog['wq_affn_names']
WQ_UNBOUND = prog['WQ_UNBOUND']
WQ_ORDERED = prog['__WQ_ORDERED']
WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM']
+WQ_AFFN_CPU = prog['WQ_AFFN_CPU']
+WQ_AFFN_SMT = prog['WQ_AFFN_SMT']
+WQ_AFFN_CACHE = prog['WQ_AFFN_CACHE']
WQ_AFFN_NUMA = prog['WQ_AFFN_NUMA']
WQ_AFFN_SYSTEM = prog['WQ_AFFN_SYSTEM']
@@ -109,12 +114,10 @@ def print_pod_type(pt):
print(f' [{cpu}]={pt.cpu_pod[cpu].value_()}', end='')
print('')
-print('')
-print('NUMA')
-print_pod_type(wq_pod_types[WQ_AFFN_NUMA])
-print('')
-print('SYSTEM')
-print_pod_type(wq_pod_types[WQ_AFFN_SYSTEM])
+for affn in [WQ_AFFN_CPU, WQ_AFFN_SMT, WQ_AFFN_CACHE, WQ_AFFN_NUMA, WQ_AFFN_SYSTEM]:
+ print('')
+ print(f'{wq_affn_names[affn].string_().decode().upper()}{" (default)" if affn == wq_affn_dfl else ""}')
+ print_pod_type(wq_pod_types[affn])
print('')
print('Worker Pools')