summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/kcsan/atomic.h21
-rw-r--r--kernel/kcsan/core.c183
-rw-r--r--kernel/kcsan/debugfs.c47
-rw-r--r--kernel/kcsan/kcsan.h8
-rw-r--r--kernel/kcsan/report.c455
5 files changed, 465 insertions, 249 deletions
diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h
index a9c193053491..be9e625227f3 100644
--- a/kernel/kcsan/atomic.h
+++ b/kernel/kcsan/atomic.h
@@ -4,24 +4,17 @@
#define _KERNEL_KCSAN_ATOMIC_H
#include <linux/jiffies.h>
+#include <linux/sched.h>
/*
- * Helper that returns true if access to @ptr should be considered an atomic
- * access, even though it is not explicitly atomic.
- *
- * List all volatile globals that have been observed in races, to suppress
- * data race reports between accesses to these variables.
- *
- * For now, we assume that volatile accesses of globals are as strong as atomic
- * accesses (READ_ONCE, WRITE_ONCE cast to volatile). The situation is still not
- * entirely clear, as on some architectures (Alpha) READ_ONCE/WRITE_ONCE do more
- * than cast to volatile. Eventually, we hope to be able to remove this
- * function.
+ * Special rules for certain memory where concurrent conflicting accesses are
+ * common, however, the current convention is to not mark them; returns true if
+ * access to @ptr should be considered atomic. Called from slow-path.
*/
-static __always_inline bool kcsan_is_atomic(const volatile void *ptr)
+static bool kcsan_is_atomic_special(const volatile void *ptr)
{
- /* only jiffies for now */
- return ptr == &jiffies;
+ /* volatile globals that have been observed in data races. */
+ return ptr == &jiffies || ptr == &current->state;
}
#endif /* _KERNEL_KCSAN_ATOMIC_H */
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 589b1e7f0f25..a73a66cf79df 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -6,6 +6,7 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/list.h>
#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
@@ -18,9 +19,10 @@
#include "kcsan.h"
static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE);
-static unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK;
-static unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT;
+unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK;
+unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT;
static long kcsan_skip_watch = CONFIG_KCSAN_SKIP_WATCH;
+static bool kcsan_interrupt_watcher = IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER);
#ifdef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
@@ -30,6 +32,7 @@ module_param_named(early_enable, kcsan_early_enable, bool, 0);
module_param_named(udelay_task, kcsan_udelay_task, uint, 0644);
module_param_named(udelay_interrupt, kcsan_udelay_interrupt, uint, 0644);
module_param_named(skip_watch, kcsan_skip_watch, long, 0644);
+module_param_named(interrupt_watcher, kcsan_interrupt_watcher, bool, 0444);
bool kcsan_enabled;
@@ -40,10 +43,11 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = {
.atomic_nest_count = 0,
.in_flat_atomic = false,
.access_mask = 0,
+ .scoped_accesses = {LIST_POISON1, NULL},
};
/*
- * Helper macros to index into adjacent slots slots, starting from address slot
+ * Helper macros to index into adjacent slots, starting from address slot
* itself, followed by the right and left slots.
*
* The purpose is 2-fold:
@@ -67,7 +71,6 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = {
* slot=9: [10, 11, 9]
* slot=63: [64, 65, 63]
*/
-#define NUM_SLOTS (1 + 2*KCSAN_CHECK_ADJACENT)
#define SLOT_IDX(slot, i) (slot + ((i + KCSAN_CHECK_ADJACENT) % NUM_SLOTS))
/*
@@ -169,12 +172,16 @@ try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint)
return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT);
}
-/*
- * Return true if watchpoint was not touched, false if consumed.
- */
-static inline bool remove_watchpoint(atomic_long_t *watchpoint)
+/* Return true if watchpoint was not touched, false if already consumed. */
+static inline bool consume_watchpoint(atomic_long_t *watchpoint)
+{
+ return atomic_long_xchg_relaxed(watchpoint, CONSUMED_WATCHPOINT) != CONSUMED_WATCHPOINT;
+}
+
+/* Remove the watchpoint -- its slot may be reused after. */
+static inline void remove_watchpoint(atomic_long_t *watchpoint)
{
- return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != CONSUMED_WATCHPOINT;
+ atomic_long_set(watchpoint, INVALID_WATCHPOINT);
}
static __always_inline struct kcsan_ctx *get_ctx(void)
@@ -186,12 +193,24 @@ static __always_inline struct kcsan_ctx *get_ctx(void)
return in_task() ? &current->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx);
}
-static __always_inline bool
-is_atomic(const volatile void *ptr, size_t size, int type)
+/* Check scoped accesses; never inline because this is a slow-path! */
+static noinline void kcsan_check_scoped_accesses(void)
{
- struct kcsan_ctx *ctx;
+ struct kcsan_ctx *ctx = get_ctx();
+ struct list_head *prev_save = ctx->scoped_accesses.prev;
+ struct kcsan_scoped_access *scoped_access;
+
+ ctx->scoped_accesses.prev = NULL; /* Avoid recursion. */
+ list_for_each_entry(scoped_access, &ctx->scoped_accesses, list)
+ __kcsan_check_access(scoped_access->ptr, scoped_access->size, scoped_access->type);
+ ctx->scoped_accesses.prev = prev_save;
+}
- if ((type & KCSAN_ACCESS_ATOMIC) != 0)
+/* Rules for generic atomic accesses. Called from fast-path. */
+static __always_inline bool
+is_atomic(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx)
+{
+ if (type & KCSAN_ACCESS_ATOMIC)
return true;
/*
@@ -199,16 +218,15 @@ is_atomic(const volatile void *ptr, size_t size, int type)
* as atomic. This allows using them also in atomic regions, such as
* seqlocks, without implicitly changing their semantics.
*/
- if ((type & KCSAN_ACCESS_ASSERT) != 0)
+ if (type & KCSAN_ACCESS_ASSERT)
return false;
if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) &&
- (type & KCSAN_ACCESS_WRITE) != 0 && size <= sizeof(long) &&
+ (type & KCSAN_ACCESS_WRITE) && size <= sizeof(long) &&
IS_ALIGNED((unsigned long)ptr, size))
return true; /* Assume aligned writes up to word size are atomic. */
- ctx = get_ctx();
- if (unlikely(ctx->atomic_next > 0)) {
+ if (ctx->atomic_next > 0) {
/*
* Because we do not have separate contexts for nested
* interrupts, in case atomic_next is set, we simply assume that
@@ -222,14 +240,12 @@ is_atomic(const volatile void *ptr, size_t size, int type)
--ctx->atomic_next; /* in task, or outer interrupt */
return true;
}
- if (unlikely(ctx->atomic_nest_count > 0 || ctx->in_flat_atomic))
- return true;
- return kcsan_is_atomic(ptr);
+ return ctx->atomic_nest_count > 0 || ctx->in_flat_atomic;
}
static __always_inline bool
-should_watch(const volatile void *ptr, size_t size, int type)
+should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx)
{
/*
* Never set up watchpoints when memory operations are atomic.
@@ -238,7 +254,7 @@ should_watch(const volatile void *ptr, size_t size, int type)
* should not count towards skipped instructions, and (2) to actually
* decrement kcsan_atomic_next for consecutive instruction stream.
*/
- if (is_atomic(ptr, size, type))
+ if (is_atomic(ptr, size, type, ctx))
return false;
if (this_cpu_dec_return(kcsan_skip) >= 0)
@@ -320,8 +336,9 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
flags = user_access_save();
if (consumed) {
- kcsan_report(ptr, size, type, true, raw_smp_processor_id(),
- KCSAN_REPORT_CONSUMED_WATCHPOINT);
+ kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_MAYBE,
+ KCSAN_REPORT_CONSUMED_WATCHPOINT,
+ watchpoint - watchpoints);
} else {
/*
* The other thread may not print any diagnostics, as it has
@@ -354,7 +371,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
unsigned long access_mask;
enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE;
unsigned long ua_flags = user_access_save();
- unsigned long irq_flags;
+ unsigned long irq_flags = 0;
/*
* Always reset kcsan_skip counter in slow-path to avoid underflow; see
@@ -365,31 +382,23 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
if (!kcsan_is_enabled())
goto out;
+ /*
+ * Special atomic rules: unlikely to be true, so we check them here in
+ * the slow-path, and not in the fast-path in is_atomic(). Call after
+ * kcsan_is_enabled(), as we may access memory that is not yet
+ * initialized during early boot.
+ */
+ if (!is_assert && kcsan_is_atomic_special(ptr))
+ goto out;
+
if (!check_encodable((unsigned long)ptr, size)) {
kcsan_counter_inc(KCSAN_COUNTER_UNENCODABLE_ACCESSES);
goto out;
}
- /*
- * Disable interrupts & preemptions to avoid another thread on the same
- * CPU accessing memory locations for the set up watchpoint; this is to
- * avoid reporting races to e.g. CPU-local data.
- *
- * An alternative would be adding the source CPU to the watchpoint
- * encoding, and checking that watchpoint-CPU != this-CPU. There are
- * several problems with this:
- * 1. we should avoid stealing more bits from the watchpoint encoding
- * as it would affect accuracy, as well as increase performance
- * overhead in the fast-path;
- * 2. if we are preempted, but there *is* a genuine data race, we
- * would *not* report it -- since this is the common case (vs.
- * CPU-local data accesses), it makes more sense (from a data race
- * detection point of view) to simply disable preemptions to ensure
- * as many tasks as possible run on other CPUs.
- *
- * Use raw versions, to avoid lockdep recursion via IRQ flags tracing.
- */
- raw_local_irq_save(irq_flags);
+ if (!kcsan_interrupt_watcher)
+ /* Use raw to avoid lockdep recursion via IRQ flags tracing. */
+ raw_local_irq_save(irq_flags);
watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
if (watchpoint == NULL) {
@@ -477,7 +486,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
value_change = KCSAN_VALUE_CHANGE_TRUE;
/* Check if this access raced with another. */
- if (!remove_watchpoint(watchpoint)) {
+ if (!consume_watchpoint(watchpoint)) {
/*
* Depending on the access type, map a value_change of MAYBE to
* TRUE (always report) or FALSE (never report).
@@ -507,8 +516,8 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE)
kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES);
- kcsan_report(ptr, size, type, value_change, smp_processor_id(),
- KCSAN_REPORT_RACE_SIGNAL);
+ kcsan_report(ptr, size, type, value_change, KCSAN_REPORT_RACE_SIGNAL,
+ watchpoint - watchpoints);
} else if (value_change == KCSAN_VALUE_CHANGE_TRUE) {
/* Inferring a race, since the value should not have changed. */
@@ -518,13 +527,19 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert)
kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_TRUE,
- smp_processor_id(),
- KCSAN_REPORT_RACE_UNKNOWN_ORIGIN);
+ KCSAN_REPORT_RACE_UNKNOWN_ORIGIN,
+ watchpoint - watchpoints);
}
+ /*
+ * Remove watchpoint; must be after reporting, since the slot may be
+ * reused after this point.
+ */
+ remove_watchpoint(watchpoint);
kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS);
out_unlock:
- raw_local_irq_restore(irq_flags);
+ if (!kcsan_interrupt_watcher)
+ raw_local_irq_restore(irq_flags);
out:
user_access_restore(ua_flags);
}
@@ -560,8 +575,14 @@ static __always_inline void check_access(const volatile void *ptr, size_t size,
if (unlikely(watchpoint != NULL))
kcsan_found_watchpoint(ptr, size, type, watchpoint,
encoded_watchpoint);
- else if (unlikely(should_watch(ptr, size, type)))
- kcsan_setup_watchpoint(ptr, size, type);
+ else {
+ struct kcsan_ctx *ctx = get_ctx(); /* Call only once in fast-path. */
+
+ if (unlikely(should_watch(ptr, size, type, ctx)))
+ kcsan_setup_watchpoint(ptr, size, type);
+ else if (unlikely(ctx->scoped_accesses.prev))
+ kcsan_check_scoped_accesses();
+ }
}
/* === Public interface ===================================================== */
@@ -604,6 +625,13 @@ void kcsan_enable_current(void)
}
EXPORT_SYMBOL(kcsan_enable_current);
+void kcsan_enable_current_nowarn(void)
+{
+ if (get_ctx()->disable_count-- == 0)
+ kcsan_disable_current();
+}
+EXPORT_SYMBOL(kcsan_enable_current_nowarn);
+
void kcsan_nestable_atomic_begin(void)
{
/*
@@ -657,6 +685,55 @@ void kcsan_set_access_mask(unsigned long mask)
}
EXPORT_SYMBOL(kcsan_set_access_mask);
+struct kcsan_scoped_access *
+kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type,
+ struct kcsan_scoped_access *sa)
+{
+ struct kcsan_ctx *ctx = get_ctx();
+
+ __kcsan_check_access(ptr, size, type);
+
+ ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */
+
+ INIT_LIST_HEAD(&sa->list);
+ sa->ptr = ptr;
+ sa->size = size;
+ sa->type = type;
+
+ if (!ctx->scoped_accesses.prev) /* Lazy initialize list head. */
+ INIT_LIST_HEAD(&ctx->scoped_accesses);
+ list_add(&sa->list, &ctx->scoped_accesses);
+
+ ctx->disable_count--;
+ return sa;
+}
+EXPORT_SYMBOL(kcsan_begin_scoped_access);
+
+void kcsan_end_scoped_access(struct kcsan_scoped_access *sa)
+{
+ struct kcsan_ctx *ctx = get_ctx();
+
+ if (WARN(!ctx->scoped_accesses.prev, "Unbalanced %s()?", __func__))
+ return;
+
+ ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */
+
+ list_del(&sa->list);
+ if (list_empty(&ctx->scoped_accesses))
+ /*
+ * Ensure we do not enter kcsan_check_scoped_accesses()
+ * slow-path if unnecessary, and avoids requiring list_empty()
+ * in the fast-path (to avoid a READ_ONCE() and potential
+ * uaccess warning).
+ */
+ ctx->scoped_accesses.prev = NULL;
+
+ ctx->disable_count--;
+
+ __kcsan_check_access(sa->ptr, sa->size, sa->type);
+}
+EXPORT_SYMBOL(kcsan_end_scoped_access);
+
void __kcsan_check_access(const volatile void *ptr, size_t size, int type)
{
check_access(ptr, size, type);
diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c
index 2ff196123977..023e49c58d55 100644
--- a/kernel/kcsan/debugfs.c
+++ b/kernel/kcsan/debugfs.c
@@ -74,25 +74,34 @@ void kcsan_counter_dec(enum kcsan_counter_id id)
*/
static noinline void microbenchmark(unsigned long iters)
{
+ const struct kcsan_ctx ctx_save = current->kcsan_ctx;
+ const bool was_enabled = READ_ONCE(kcsan_enabled);
cycles_t cycles;
+ /* We may have been called from an atomic region; reset context. */
+ memset(&current->kcsan_ctx, 0, sizeof(current->kcsan_ctx));
+ /*
+ * Disable to benchmark fast-path for all accesses, and (expected
+ * negligible) call into slow-path, but never set up watchpoints.
+ */
+ WRITE_ONCE(kcsan_enabled, false);
+
pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters);
cycles = get_cycles();
while (iters--) {
- /*
- * We can run this benchmark from multiple tasks; this address
- * calculation increases likelyhood of some accesses
- * overlapping. Make the access type an atomic read, to never
- * set up watchpoints and test the fast-path only.
- */
- unsigned long addr =
- iters % (CONFIG_KCSAN_NUM_WATCHPOINTS * PAGE_SIZE);
- __kcsan_check_access((void *)addr, sizeof(long), KCSAN_ACCESS_ATOMIC);
+ unsigned long addr = iters & ((PAGE_SIZE << 8) - 1);
+ int type = !(iters & 0x7f) ? KCSAN_ACCESS_ATOMIC :
+ (!(iters & 0xf) ? KCSAN_ACCESS_WRITE : 0);
+ __kcsan_check_access((void *)addr, sizeof(long), type);
}
cycles = get_cycles() - cycles;
pr_info("KCSAN: %s end | cycles: %llu\n", __func__, cycles);
+
+ WRITE_ONCE(kcsan_enabled, was_enabled);
+ /* restore context */
+ current->kcsan_ctx = ctx_save;
}
/*
@@ -101,6 +110,7 @@ static noinline void microbenchmark(unsigned long iters)
*/
static long test_dummy;
static long test_flags;
+static long test_scoped;
static noinline void test_thread(unsigned long iters)
{
const long CHANGE_BITS = 0xff00ff00ff00ff00L;
@@ -111,7 +121,8 @@ static noinline void test_thread(unsigned long iters)
memset(&current->kcsan_ctx, 0, sizeof(current->kcsan_ctx));
pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters);
- pr_info("test_dummy@%px, test_flags@%px\n", &test_dummy, &test_flags);
+ pr_info("test_dummy@%px, test_flags@%px, test_scoped@%px,\n",
+ &test_dummy, &test_flags, &test_scoped);
cycles = get_cycles();
while (iters--) {
@@ -132,6 +143,18 @@ static noinline void test_thread(unsigned long iters)
test_flags ^= CHANGE_BITS; /* generate value-change */
__kcsan_check_write(&test_flags, sizeof(test_flags));
+
+ BUG_ON(current->kcsan_ctx.scoped_accesses.prev);
+ {
+ /* Should generate reports anywhere in this block. */
+ ASSERT_EXCLUSIVE_WRITER_SCOPED(test_scoped);
+ ASSERT_EXCLUSIVE_ACCESS_SCOPED(test_scoped);
+ BUG_ON(!current->kcsan_ctx.scoped_accesses.prev);
+ /* Unrelated accesses. */
+ __kcsan_check_access(&cycles, sizeof(cycles), 0);
+ __kcsan_check_access(&cycles, sizeof(cycles), KCSAN_ACCESS_ATOMIC);
+ }
+ BUG_ON(current->kcsan_ctx.scoped_accesses.prev);
}
cycles = get_cycles() - cycles;
@@ -207,7 +230,7 @@ static ssize_t insert_report_filterlist(const char *func)
/* initial allocation */
report_filterlist.addrs =
kmalloc_array(report_filterlist.size,
- sizeof(unsigned long), GFP_KERNEL);
+ sizeof(unsigned long), GFP_ATOMIC);
if (report_filterlist.addrs == NULL) {
ret = -ENOMEM;
goto out;
@@ -217,7 +240,7 @@ static ssize_t insert_report_filterlist(const char *func)
size_t new_size = report_filterlist.size * 2;
unsigned long *new_addrs =
krealloc(report_filterlist.addrs,
- new_size * sizeof(unsigned long), GFP_KERNEL);
+ new_size * sizeof(unsigned long), GFP_ATOMIC);
if (new_addrs == NULL) {
/* leave filterlist itself untouched */
diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h
index 892de5120c1b..763d6d08d94b 100644
--- a/kernel/kcsan/kcsan.h
+++ b/kernel/kcsan/kcsan.h
@@ -12,6 +12,10 @@
/* The number of adjacent watchpoints to check. */
#define KCSAN_CHECK_ADJACENT 1
+#define NUM_SLOTS (1 + 2*KCSAN_CHECK_ADJACENT)
+
+extern unsigned int kcsan_udelay_task;
+extern unsigned int kcsan_udelay_interrupt;
/*
* Globally enable and disable KCSAN.
@@ -132,7 +136,7 @@ enum kcsan_report_type {
* Print a race report from thread that encountered the race.
*/
extern void kcsan_report(const volatile void *ptr, size_t size, int access_type,
- enum kcsan_value_change value_change, int cpu_id,
- enum kcsan_report_type type);
+ enum kcsan_value_change value_change,
+ enum kcsan_report_type type, int watchpoint_idx);
#endif /* _KERNEL_KCSAN_KCSAN_H */
diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
index 11c791b886f3..ac5f8345bae9 100644
--- a/kernel/kcsan/report.c
+++ b/kernel/kcsan/report.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/debug_locks.h>
+#include <linux/delay.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/lockdep.h>
@@ -17,21 +19,49 @@
*/
#define NUM_STACK_ENTRIES 64
-/*
- * Other thread info: communicated from other racing thread to thread that set
- * up the watchpoint, which then prints the complete report atomically. Only
- * need one struct, as all threads should to be serialized regardless to print
- * the reports, with reporting being in the slow-path.
- */
-static struct {
+/* Common access info. */
+struct access_info {
const volatile void *ptr;
size_t size;
int access_type;
int task_pid;
int cpu_id;
+};
+
+/*
+ * Other thread info: communicated from other racing thread to thread that set
+ * up the watchpoint, which then prints the complete report atomically.
+ */
+struct other_info {
+ struct access_info ai;
unsigned long stack_entries[NUM_STACK_ENTRIES];
int num_stack_entries;
-} other_info = { .ptr = NULL };
+
+ /*
+ * Optionally pass @current. Typically we do not need to pass @current
+ * via @other_info since just @task_pid is sufficient. Passing @current
+ * has additional overhead.
+ *
+ * To safely pass @current, we must either use get_task_struct/
+ * put_task_struct, or stall the thread that populated @other_info.
+ *
+ * We cannot rely on get_task_struct/put_task_struct in case
+ * release_report() races with a task being released, and would have to
+ * free it in release_report(). This may result in deadlock if we want
+ * to use KCSAN on the allocators.
+ *
+ * Since we also want to reliably print held locks for
+ * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread
+ * that populated @other_info until it has been consumed.
+ */
+ struct task_struct *task;
+};
+
+/*
+ * To never block any producers of struct other_info, we need as many elements
+ * as we have watchpoints (upper bound on concurrent races to report).
+ */
+static struct other_info other_infos[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
/*
* Information about reported races; used to rate limit reporting.
@@ -68,10 +98,11 @@ struct report_time {
static struct report_time report_times[REPORT_TIMES_SIZE];
/*
- * This spinlock protects reporting and other_info, since other_info is usually
- * required when reporting.
+ * Spinlock serializing report generation, and access to @other_infos. Although
+ * it could make sense to have a finer-grained locking story for @other_infos,
+ * report generation needs to be serialized either way, so not much is gained.
*/
-static DEFINE_SPINLOCK(report_lock);
+static DEFINE_RAW_SPINLOCK(report_lock);
/*
* Checks if the race identified by thread frames frame1 and frame2 has
@@ -161,11 +192,11 @@ skip_report(enum kcsan_value_change value_change, unsigned long top_frame)
* maintainers.
*/
char buf[64];
+ int len = scnprintf(buf, sizeof(buf), "%ps", (void *)top_frame);
- snprintf(buf, sizeof(buf), "%ps", (void *)top_frame);
- if (!strnstr(buf, "rcu_", sizeof(buf)) &&
- !strnstr(buf, "_rcu", sizeof(buf)) &&
- !strnstr(buf, "_srcu", sizeof(buf)))
+ if (!strnstr(buf, "rcu_", len) &&
+ !strnstr(buf, "_rcu", len) &&
+ !strnstr(buf, "_srcu", len))
return true;
}
@@ -174,6 +205,20 @@ skip_report(enum kcsan_value_change value_change, unsigned long top_frame)
static const char *get_access_type(int type)
{
+ if (type & KCSAN_ACCESS_ASSERT) {
+ if (type & KCSAN_ACCESS_SCOPED) {
+ if (type & KCSAN_ACCESS_WRITE)
+ return "assert no accesses (scoped)";
+ else
+ return "assert no writes (scoped)";
+ } else {
+ if (type & KCSAN_ACCESS_WRITE)
+ return "assert no accesses";
+ else
+ return "assert no writes";
+ }
+ }
+
switch (type) {
case 0:
return "read";
@@ -183,17 +228,14 @@ static const char *get_access_type(int type)
return "write";
case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
return "write (marked)";
-
- /*
- * ASSERT variants:
- */
- case KCSAN_ACCESS_ASSERT:
- case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_ATOMIC:
- return "assert no writes";
- case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE:
- case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
- return "assert no accesses";
-
+ case KCSAN_ACCESS_SCOPED:
+ return "read (scoped)";
+ case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC:
+ return "read (marked, scoped)";
+ case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE:
+ return "write (scoped)";
+ case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
+ return "write (marked, scoped)";
default:
BUG();
}
@@ -217,19 +259,35 @@ static const char *get_thread_desc(int task_id)
}
/* Helper to skip KCSAN-related functions in stack-trace. */
-static int get_stack_skipnr(unsigned long stack_entries[], int num_entries)
+static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries)
{
char buf[64];
- int skip = 0;
-
- for (; skip < num_entries; ++skip) {
- snprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]);
- if (!strnstr(buf, "csan_", sizeof(buf)) &&
- !strnstr(buf, "tsan_", sizeof(buf)) &&
- !strnstr(buf, "_once_size", sizeof(buf))) {
- break;
+ char *cur;
+ int len, skip;
+
+ for (skip = 0; skip < num_entries; ++skip) {
+ len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]);
+
+ /* Never show tsan_* or {read,write}_once_size. */
+ if (strnstr(buf, "tsan_", len) ||
+ strnstr(buf, "_once_size", len))
+ continue;
+
+ cur = strnstr(buf, "kcsan_", len);
+ if (cur) {
+ cur += sizeof("kcsan_") - 1;
+ if (strncmp(cur, "test", sizeof("test") - 1))
+ continue; /* KCSAN runtime function. */
+ /* KCSAN related test. */
}
+
+ /*
+ * No match for runtime functions -- @skip entries to skip to
+ * get to first frame of interest.
+ */
+ break;
}
+
return skip;
}
@@ -245,12 +303,23 @@ static int sym_strcmp(void *addr1, void *addr2)
return strncmp(buf1, buf2, sizeof(buf1));
}
+static void print_verbose_info(struct task_struct *task)
+{
+ if (!task)
+ return;
+
+ pr_err("\n");
+ debug_show_held_locks(task);
+ print_irqtrace_events(task);
+}
+
/*
* Returns true if a report was generated, false otherwise.
*/
-static bool print_report(const volatile void *ptr, size_t size, int access_type,
- enum kcsan_value_change value_change, int cpu_id,
- enum kcsan_report_type type)
+static bool print_report(enum kcsan_value_change value_change,
+ enum kcsan_report_type type,
+ const struct access_info *ai,
+ const struct other_info *other_info)
{
unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 };
int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1);
@@ -266,9 +335,9 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type,
return false;
if (type == KCSAN_REPORT_RACE_SIGNAL) {
- other_skipnr = get_stack_skipnr(other_info.stack_entries,
- other_info.num_stack_entries);
- other_frame = other_info.stack_entries[other_skipnr];
+ other_skipnr = get_stack_skipnr(other_info->stack_entries,
+ other_info->num_stack_entries);
+ other_frame = other_info->stack_entries[other_skipnr];
/* @value_change is only known for the other thread */
if (skip_report(value_change, other_frame))
@@ -290,13 +359,13 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type,
*/
cmp = sym_strcmp((void *)other_frame, (void *)this_frame);
pr_err("BUG: KCSAN: %s in %ps / %ps\n",
- get_bug_type(access_type | other_info.access_type),
+ get_bug_type(ai->access_type | other_info->ai.access_type),
(void *)(cmp < 0 ? other_frame : this_frame),
(void *)(cmp < 0 ? this_frame : other_frame));
} break;
case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
- pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(access_type),
+ pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai->access_type),
(void *)this_frame);
break;
@@ -310,27 +379,28 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type,
switch (type) {
case KCSAN_REPORT_RACE_SIGNAL:
pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
- get_access_type(other_info.access_type), other_info.ptr,
- other_info.size, get_thread_desc(other_info.task_pid),
- other_info.cpu_id);
+ get_access_type(other_info->ai.access_type), other_info->ai.ptr,
+ other_info->ai.size, get_thread_desc(other_info->ai.task_pid),
+ other_info->ai.cpu_id);
/* Print the other thread's stack trace. */
- stack_trace_print(other_info.stack_entries + other_skipnr,
- other_info.num_stack_entries - other_skipnr,
+ stack_trace_print(other_info->stack_entries + other_skipnr,
+ other_info->num_stack_entries - other_skipnr,
0);
+ if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
+ print_verbose_info(other_info->task);
+
pr_err("\n");
pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
- get_access_type(access_type), ptr, size,
- get_thread_desc(in_task() ? task_pid_nr(current) : -1),
- cpu_id);
+ get_access_type(ai->access_type), ai->ptr, ai->size,
+ get_thread_desc(ai->task_pid), ai->cpu_id);
break;
case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n",
- get_access_type(access_type), ptr, size,
- get_thread_desc(in_task() ? task_pid_nr(current) : -1),
- cpu_id);
+ get_access_type(ai->access_type), ai->ptr, ai->size,
+ get_thread_desc(ai->task_pid), ai->cpu_id);
break;
default:
@@ -340,6 +410,9 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type,
stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr,
0);
+ if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
+ print_verbose_info(current);
+
/* Print report footer. */
pr_err("\n");
pr_err("Reported by Kernel Concurrency Sanitizer on:\n");
@@ -349,142 +422,188 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type,
return true;
}
-static void release_report(unsigned long *flags, enum kcsan_report_type type)
+static void release_report(unsigned long *flags, struct other_info *other_info)
{
- if (type == KCSAN_REPORT_RACE_SIGNAL)
- other_info.ptr = NULL; /* mark for reuse */
+ if (other_info)
+ /*
+ * Use size to denote valid/invalid, since KCSAN entirely
+ * ignores 0-sized accesses.
+ */
+ other_info->ai.size = 0;
- spin_unlock_irqrestore(&report_lock, *flags);
+ raw_spin_unlock_irqrestore(&report_lock, *flags);
}
/*
- * Depending on the report type either sets other_info and returns false, or
- * acquires the matching other_info and returns true. If other_info is not
- * required for the report type, simply acquires report_lock and returns true.
+ * Sets @other_info->task and awaits consumption of @other_info.
+ *
+ * Precondition: report_lock is held.
+ * Postcondition: report_lock is held.
*/
-static bool prepare_report(unsigned long *flags, const volatile void *ptr,
- size_t size, int access_type, int cpu_id,
- enum kcsan_report_type type)
+static void set_other_info_task_blocking(unsigned long *flags,
+ const struct access_info *ai,
+ struct other_info *other_info)
{
- if (type != KCSAN_REPORT_CONSUMED_WATCHPOINT &&
- type != KCSAN_REPORT_RACE_SIGNAL) {
- /* other_info not required; just acquire report_lock */
- spin_lock_irqsave(&report_lock, *flags);
- return true;
- }
+ /*
+ * We may be instrumenting a code-path where current->state is already
+ * something other than TASK_RUNNING.
+ */
+ const bool is_running = current->state == TASK_RUNNING;
+ /*
+ * To avoid deadlock in case we are in an interrupt here and this is a
+ * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a
+ * timeout to ensure this works in all contexts.
+ *
+ * Await approximately the worst case delay of the reporting thread (if
+ * we are not interrupted).
+ */
+ int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt);
+
+ other_info->task = current;
+ do {
+ if (is_running) {
+ /*
+ * Let lockdep know the real task is sleeping, to print
+ * the held locks (recall we turned lockdep off, so
+ * locking/unlocking @report_lock won't be recorded).
+ */
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ }
+ raw_spin_unlock_irqrestore(&report_lock, *flags);
+ /*
+ * We cannot call schedule() since we also cannot reliably
+ * determine if sleeping here is permitted -- see in_atomic().
+ */
-retry:
- spin_lock_irqsave(&report_lock, *flags);
+ udelay(1);
+ raw_spin_lock_irqsave(&report_lock, *flags);
+ if (timeout-- < 0) {
+ /*
+ * Abort. Reset @other_info->task to NULL, since it
+ * appears the other thread is still going to consume
+ * it. It will result in no verbose info printed for
+ * this task.
+ */
+ other_info->task = NULL;
+ break;
+ }
+ /*
+ * If invalid, or @ptr nor @current matches, then @other_info
+ * has been consumed and we may continue. If not, retry.
+ */
+ } while (other_info->ai.size && other_info->ai.ptr == ai->ptr &&
+ other_info->task == current);
+ if (is_running)
+ set_current_state(TASK_RUNNING);
+}
- switch (type) {
- case KCSAN_REPORT_CONSUMED_WATCHPOINT:
- if (other_info.ptr != NULL)
- break; /* still in use, retry */
+/* Populate @other_info; requires that the provided @other_info not in use. */
+static void prepare_report_producer(unsigned long *flags,
+ const struct access_info *ai,
+ struct other_info *other_info)
+{
+ raw_spin_lock_irqsave(&report_lock, *flags);
- other_info.ptr = ptr;
- other_info.size = size;
- other_info.access_type = access_type;
- other_info.task_pid = in_task() ? task_pid_nr(current) : -1;
- other_info.cpu_id = cpu_id;
- other_info.num_stack_entries = stack_trace_save(other_info.stack_entries, NUM_STACK_ENTRIES, 1);
+ /*
+ * The same @other_infos entry cannot be used concurrently, because
+ * there is a one-to-one mapping to watchpoint slots (@watchpoints in
+ * core.c), and a watchpoint is only released for reuse after reporting
+ * is done by the consumer of @other_info. Therefore, it is impossible
+ * for another concurrent prepare_report_producer() to set the same
+ * @other_info, and are guaranteed exclusivity for the @other_infos
+ * entry pointed to by @other_info.
+ *
+ * To check this property holds, size should never be non-zero here,
+ * because every consumer of struct other_info resets size to 0 in
+ * release_report().
+ */
+ WARN_ON(other_info->ai.size);
- spin_unlock_irqrestore(&report_lock, *flags);
+ other_info->ai = *ai;
+ other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 2);
- /*
- * The other thread will print the summary; other_info may now
- * be consumed.
- */
- return false;
+ if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
+ set_other_info_task_blocking(flags, ai, other_info);
- case KCSAN_REPORT_RACE_SIGNAL:
- if (other_info.ptr == NULL)
- break; /* no data available yet, retry */
+ raw_spin_unlock_irqrestore(&report_lock, *flags);
+}
- /*
- * First check if this is the other_info we are expecting, i.e.
- * matches based on how watchpoint was encoded.
- */
- if (!matching_access((unsigned long)other_info.ptr &
- WATCHPOINT_ADDR_MASK,
- other_info.size,
- (unsigned long)ptr & WATCHPOINT_ADDR_MASK,
- size))
- break; /* mismatching watchpoint, retry */
-
- if (!matching_access((unsigned long)other_info.ptr,
- other_info.size, (unsigned long)ptr,
- size)) {
- /*
- * If the actual accesses to not match, this was a false
- * positive due to watchpoint encoding.
- */
- kcsan_counter_inc(
- KCSAN_COUNTER_ENCODING_FALSE_POSITIVES);
+/* Awaits producer to fill @other_info and then returns. */
+static bool prepare_report_consumer(unsigned long *flags,
+ const struct access_info *ai,
+ struct other_info *other_info)
+{
- /* discard this other_info */
- release_report(flags, KCSAN_REPORT_RACE_SIGNAL);
- return false;
- }
+ raw_spin_lock_irqsave(&report_lock, *flags);
+ while (!other_info->ai.size) { /* Await valid @other_info. */
+ raw_spin_unlock_irqrestore(&report_lock, *flags);
+ cpu_relax();
+ raw_spin_lock_irqsave(&report_lock, *flags);
+ }
- access_type |= other_info.access_type;
- if ((access_type & KCSAN_ACCESS_WRITE) == 0) {
- /*
- * While the address matches, this is not the other_info
- * from the thread that consumed our watchpoint, since
- * neither this nor the access in other_info is a write.
- * It is invalid to continue with the report, since we
- * only have information about reads.
- *
- * This can happen due to concurrent races on the same
- * address, with at least 4 threads. To avoid locking up
- * other_info and all other threads, we have to consume
- * it regardless.
- *
- * A concrete case to illustrate why we might lock up if
- * we do not consume other_info:
- *
- * We have 4 threads, all accessing the same address
- * (or matching address ranges). Assume the following
- * watcher and watchpoint consumer pairs:
- * write1-read1, read2-write2. The first to populate
- * other_info is write2, however, write1 consumes it,
- * resulting in a report of write1-write2. This report
- * is valid, however, now read1 populates other_info;
- * read2-read1 is an invalid conflict, yet, no other
- * conflicting access is left. Therefore, we must
- * consume read1's other_info.
- *
- * Since this case is assumed to be rare, it is
- * reasonable to omit this report: one of the other
- * reports includes information about the same shared
- * data, and at this point the likelihood that we
- * re-report the same race again is high.
- */
- release_report(flags, KCSAN_REPORT_RACE_SIGNAL);
- return false;
- }
+ /* Should always have a matching access based on watchpoint encoding. */
+ if (WARN_ON(!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size,
+ (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size)))
+ goto discard;
+ if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size,
+ (unsigned long)ai->ptr, ai->size)) {
/*
- * Matching & usable access in other_info: keep other_info_lock
- * locked, as this thread consumes it to print the full report;
- * unlocked in release_report.
+ * If the actual accesses to not match, this was a false
+ * positive due to watchpoint encoding.
*/
- return true;
-
- default:
- BUG();
+ kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES);
+ goto discard;
}
- spin_unlock_irqrestore(&report_lock, *flags);
+ return true;
- goto retry;
+discard:
+ release_report(flags, other_info);
+ return false;
+}
+
+/*
+ * Depending on the report type either sets @other_info and returns false, or
+ * awaits @other_info and returns true. If @other_info is not required for the
+ * report type, simply acquires @report_lock and returns true.
+ */
+static noinline bool prepare_report(unsigned long *flags,
+ enum kcsan_report_type type,
+ const struct access_info *ai,
+ struct other_info *other_info)
+{
+ switch (type) {
+ case KCSAN_REPORT_CONSUMED_WATCHPOINT:
+ prepare_report_producer(flags, ai, other_info);
+ return false;
+ case KCSAN_REPORT_RACE_SIGNAL:
+ return prepare_report_consumer(flags, ai, other_info);
+ default:
+ /* @other_info not required; just acquire @report_lock. */
+ raw_spin_lock_irqsave(&report_lock, *flags);
+ return true;
+ }
}
void kcsan_report(const volatile void *ptr, size_t size, int access_type,
- enum kcsan_value_change value_change, int cpu_id,
- enum kcsan_report_type type)
+ enum kcsan_value_change value_change,
+ enum kcsan_report_type type, int watchpoint_idx)
{
unsigned long flags = 0;
+ const struct access_info ai = {
+ .ptr = ptr,
+ .size = size,
+ .access_type = access_type,
+ .task_pid = in_task() ? task_pid_nr(current) : -1,
+ .cpu_id = raw_smp_processor_id()
+ };
+ struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
+ ? NULL : &other_infos[watchpoint_idx];
+
+ kcsan_disable_current();
+ if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos)))
+ goto out;
/*
* With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
@@ -494,22 +613,22 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type,
*/
lockdep_off();
- kcsan_disable_current();
- if (prepare_report(&flags, ptr, size, access_type, cpu_id, type)) {
+ if (prepare_report(&flags, type, &ai, other_info)) {
/*
* Never report if value_change is FALSE, only if we it is
* either TRUE or MAYBE. In case of MAYBE, further filtering may
* be done once we know the full stack trace in print_report().
*/
bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE &&
- print_report(ptr, size, access_type, value_change, cpu_id, type);
+ print_report(value_change, type, &ai, other_info);
if (reported && panic_on_warn)
panic("panic_on_warn set ...\n");
- release_report(&flags, type);
+ release_report(&flags, other_info);
}
- kcsan_enable_current();
lockdep_on();
+out:
+ kcsan_enable_current();
}