diff options
author | Tejun Heo <tj@kernel.org> | 2024-06-18 22:09:19 +0200 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2024-06-18 22:09:19 +0200 |
commit | 0922f54fdd15aedb93730eb8cfa0c069cbad4e08 (patch) | |
tree | a0a44695bd61bee22aea92532a5ace5f6ac75d50 /tools/sched_ext | |
parent | sched_ext: Add a central scheduler which makes all scheduling decisions on on... (diff) | |
download | linux-0922f54fdd15aedb93730eb8cfa0c069cbad4e08.tar.xz linux-0922f54fdd15aedb93730eb8cfa0c069cbad4e08.zip |
sched_ext: Make watchdog handle ops.dispatch() looping stall
The dispatch path retries if the local DSQ is still empty after
ops.dispatch() either dispatched or consumed a task. This is both out of
necessity and for convenience. It has to retry because the dispatch path
might lose the tasks to dequeue while the rq lock is released while trying
to migrate tasks across CPUs, and the retry mechanism makes ops.dispatch()
implementation easier as it only needs to make some forward progress each
iteration.
However, this makes it possible for ops.dispatch() to stall CPUs by
repeatedly dispatching ineligible tasks. If all CPUs are stalled that way,
the watchdog or sysrq handler can't run and the system can't be saved. Let's
address the issue by breaking out of the dispatch loop after 32 iterations.
It is unlikely but not impossible for ops.dispatch() to legitimately go over
the iteration limit. We want to come back to the dispatch path in such cases
as not doing so risks stalling the CPU by idling with runnable tasks
pending. As the previous task is still current in balance_scx(),
resched_curr() doesn't do anything - it will just get cleared. Let's instead
use scx_kick_bpf() which will trigger reschedule after switching to the next
task which will likely be the idle task.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Diffstat (limited to 'tools/sched_ext')
-rw-r--r-- | tools/sched_ext/scx_qmap.bpf.c | 15 | ||||
-rw-r--r-- | tools/sched_ext/scx_qmap.c | 8 |
2 files changed, 21 insertions, 2 deletions
diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index 5b3da28bf042..879fc9c788e5 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -31,6 +31,7 @@ char _license[] SEC("license") = "GPL"; const volatile u64 slice_ns = SCX_SLICE_DFL; const volatile u32 stall_user_nth; const volatile u32 stall_kernel_nth; +const volatile u32 dsp_inf_loop_after; const volatile u32 dsp_batch; const volatile s32 disallow_tgid; const volatile bool suppress_dump; @@ -198,6 +199,20 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) if (scx_bpf_consume(SHARED_DSQ)) return; + if (dsp_inf_loop_after && nr_dispatched > dsp_inf_loop_after) { + /* + * PID 2 should be kthreadd which should mostly be idle and off + * the scheduler. Let's keep dispatching it to force the kernel + * to call this function over and over again. + */ + p = bpf_task_from_pid(2); + if (p) { + scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, 0); + bpf_task_release(p); + return; + } + } + if (!(cpuc = bpf_map_lookup_elem(&cpu_ctx_stor, &zero))) { scx_bpf_error("failed to look up cpu_ctx"); return; diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c index a1123a17581b..594147a710a8 100644 --- a/tools/sched_ext/scx_qmap.c +++ b/tools/sched_ext/scx_qmap.c @@ -19,13 +19,14 @@ const char help_fmt[] = "\n" "See the top-level comment in .bpf.c for more details.\n" "\n" -"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-b COUNT]\n" +"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-b COUNT]\n" " [-d PID] [-D LEN] [-p] [-v]\n" "\n" " -s SLICE_US Override slice duration\n" " -e COUNT Trigger scx_bpf_error() after COUNT enqueues\n" " -t COUNT Stall every COUNT'th user thread\n" " -T COUNT Stall every COUNT'th kernel thread\n" +" -l COUNT Trigger dispatch infinite looping after COUNT dispatches\n" " -b COUNT Dispatch upto COUNT tasks together\n" " -d PID Disallow a process from switching into SCHED_EXT (-1 for self)\n" " -D LEN Set scx_exit_info.dump buffer length\n" @@ -61,7 +62,7 @@ int main(int argc, char **argv) skel = SCX_OPS_OPEN(qmap_ops, scx_qmap); - while ((opt = getopt(argc, argv, "s:e:t:T:b:d:D:Spvh")) != -1) { + while ((opt = getopt(argc, argv, "s:e:t:T:l:b:d:D:Spvh")) != -1) { switch (opt) { case 's': skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000; @@ -75,6 +76,9 @@ int main(int argc, char **argv) case 'T': skel->rodata->stall_kernel_nth = strtoul(optarg, NULL, 0); break; + case 'l': + skel->rodata->dsp_inf_loop_after = strtoul(optarg, NULL, 0); + break; case 'b': skel->rodata->dsp_batch = strtoul(optarg, NULL, 0); break; |