summaryrefslogtreecommitdiffstats
path: root/kernel/watchdog_perf.c
diff options
context:
space:
mode:
authorSong Liu <song@kernel.org>2024-04-30 08:02:36 +0200
committerAndrew Morton <akpm@linux-foundation.org>2024-05-08 17:41:29 +0200
commit393fb313a2e150b768e4850658679e2afff431e9 (patch)
tree78dca4066c9e72b70b2eb4ce5dd6faccf3b42c93 /kernel/watchdog_perf.c
parentwatchdog: handle comma separated nmi_watchdog command line (diff)
downloadlinux-393fb313a2e150b768e4850658679e2afff431e9.tar.xz
linux-393fb313a2e150b768e4850658679e2afff431e9.zip
watchdog: allow nmi watchdog to use raw perf event
NMI watchdog permanently consumes one hardware counters per CPU on the system. For systems that use many hardware counters, this causes more aggressive time multiplexing of perf events. OTOH, some CPUs (mostly Intel) support "ref-cycles" event, which is rarely used. Add kernel cmdline arg nmi_watchdog=rNNN to configure the watchdog to use raw event. For example, on Intel CPUs, we can use "r300" to configure the watchdog to use ref-cycles event. If the raw event does not work, fall back to use "cycles". [akpm@linux-foundation.org: fix kerneldoc] Link: https://lkml.kernel.org/r/20240430060236.1878002-2-song@kernel.org Signed-off-by: Song Liu <song@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'kernel/watchdog_perf.c')
-rw-r--r--kernel/watchdog_perf.c46
1 files changed, 46 insertions, 0 deletions
diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index 8ea00c4a24b2..5f7d1f0d4268 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -90,6 +90,14 @@ static struct perf_event_attr wd_hw_attr = {
.disabled = 1,
};
+static struct perf_event_attr fallback_wd_hw_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .size = sizeof(struct perf_event_attr),
+ .pinned = 1,
+ .disabled = 1,
+};
+
/* Callback function for perf event subsystem */
static void watchdog_overflow_callback(struct perf_event *event,
struct perf_sample_data *data,
@@ -123,6 +131,13 @@ static int hardlockup_detector_event_create(void)
evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
watchdog_overflow_callback, NULL);
if (IS_ERR(evt)) {
+ wd_attr = &fallback_wd_hw_attr;
+ wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
+ evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
+ watchdog_overflow_callback, NULL);
+ }
+
+ if (IS_ERR(evt)) {
pr_debug("Perf event create on CPU %d failed with %ld\n", cpu,
PTR_ERR(evt));
return PTR_ERR(evt);
@@ -259,3 +274,34 @@ int __init watchdog_hardlockup_probe(void)
}
return ret;
}
+
+/**
+ * hardlockup_config_perf_event - Overwrite config of wd_hw_attr.
+ *
+ * @str: number which identifies the raw perf event to use
+ */
+void __init hardlockup_config_perf_event(const char *str)
+{
+ u64 config;
+ char buf[24];
+ char *comma = strchr(str, ',');
+
+ if (!comma) {
+ if (kstrtoull(str, 16, &config))
+ return;
+ } else {
+ unsigned int len = comma - str;
+
+ if (len >= sizeof(buf))
+ return;
+
+ if (strscpy(buf, str, sizeof(buf)) < 0)
+ return;
+ buf[len] = 0;
+ if (kstrtoull(buf, 16, &config))
+ return;
+ }
+
+ wd_hw_attr.type = PERF_TYPE_RAW;
+ wd_hw_attr.config = config;
+}