diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/atomic.h | 2 | ||||
-rw-r--r-- | include/asm-generic/qspinlock.h | 1 | ||||
-rw-r--r-- | include/asm-generic/qspinlock_types.h | 8 | ||||
-rw-r--r-- | include/linux/hardirq.h | 28 | ||||
-rw-r--r-- | include/linux/irqflags.h | 36 | ||||
-rw-r--r-- | include/linux/lockdep.h | 230 | ||||
-rw-r--r-- | include/linux/lockdep_types.h | 194 | ||||
-rw-r--r-- | include/linux/rculist.h | 2 | ||||
-rw-r--r-- | include/linux/rwsem.h | 20 | ||||
-rw-r--r-- | include/linux/sched.h | 17 | ||||
-rw-r--r-- | include/linux/seqlock.h | 756 | ||||
-rw-r--r-- | include/linux/spinlock.h | 1 | ||||
-rw-r--r-- | include/linux/spinlock_types.h | 2 | ||||
-rw-r--r-- | include/linux/types.h | 2 |
14 files changed, 823 insertions, 476 deletions
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 286867f593d2..11f96f40f4a7 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -159,8 +159,6 @@ ATOMIC_OP(xor, ^) * resource counting etc.. */ -#define ATOMIC_INIT(i) { (i) } - /** * atomic_read - read atomic variable * @v: pointer of type atomic_t diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index fde943d180e0..2b26cd729b94 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -11,6 +11,7 @@ #define __ASM_GENERIC_QSPINLOCK_H #include <asm-generic/qspinlock_types.h> +#include <linux/atomic.h> /** * queued_spin_is_locked - is the spinlock locked? diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h index 56d1309d32f8..2fd1fb89ec36 100644 --- a/include/asm-generic/qspinlock_types.h +++ b/include/asm-generic/qspinlock_types.h @@ -9,15 +9,7 @@ #ifndef __ASM_GENERIC_QSPINLOCK_TYPES_H #define __ASM_GENERIC_QSPINLOCK_TYPES_H -/* - * Including atomic.h with PARAVIRT on will cause compilation errors because - * of recursive header file incluson via paravirt_types.h. So don't include - * it if PARAVIRT is on. - */ -#ifndef CONFIG_PARAVIRT #include <linux/types.h> -#include <linux/atomic.h> -#endif typedef struct qspinlock { union { diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 03c9fece7d43..754f67ac4326 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -111,32 +111,42 @@ extern void rcu_nmi_exit(void); /* * nmi_enter() can nest up to 15 times; see NMI_BITS. */ -#define nmi_enter() \ +#define __nmi_enter() \ do { \ + lockdep_off(); \ arch_nmi_enter(); \ printk_nmi_enter(); \ - lockdep_off(); \ BUG_ON(in_nmi() == NMI_MASK); \ __preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ - rcu_nmi_enter(); \ + } while (0) + +#define nmi_enter() \ + do { \ + __nmi_enter(); \ lockdep_hardirq_enter(); \ + rcu_nmi_enter(); \ instrumentation_begin(); \ ftrace_nmi_enter(); \ instrumentation_end(); \ } while (0) +#define __nmi_exit() \ + do { \ + BUG_ON(!in_nmi()); \ + __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ + printk_nmi_exit(); \ + arch_nmi_exit(); \ + lockdep_on(); \ + } while (0) + #define nmi_exit() \ do { \ instrumentation_begin(); \ ftrace_nmi_exit(); \ instrumentation_end(); \ - lockdep_hardirq_exit(); \ rcu_nmi_exit(); \ - BUG_ON(!in_nmi()); \ - __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ - lockdep_on(); \ - printk_nmi_exit(); \ - arch_nmi_exit(); \ + lockdep_hardirq_exit(); \ + __nmi_exit(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 6384d2813ded..bd5c55755447 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -14,6 +14,7 @@ #include <linux/typecheck.h> #include <asm/irqflags.h> +#include <asm/percpu.h> /* Currently lockdep_softirqs_on/off is used only by lockdep */ #ifdef CONFIG_PROVE_LOCKING @@ -31,18 +32,35 @@ #endif #ifdef CONFIG_TRACE_IRQFLAGS + +/* Per-task IRQ trace events information. */ +struct irqtrace_events { + unsigned int irq_events; + unsigned long hardirq_enable_ip; + unsigned long hardirq_disable_ip; + unsigned int hardirq_enable_event; + unsigned int hardirq_disable_event; + unsigned long softirq_disable_ip; + unsigned long softirq_enable_ip; + unsigned int softirq_disable_event; + unsigned int softirq_enable_event; +}; + +DECLARE_PER_CPU(int, hardirqs_enabled); +DECLARE_PER_CPU(int, hardirq_context); + extern void trace_hardirqs_on_prepare(void); extern void trace_hardirqs_off_finish(void); extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); -# define lockdep_hardirq_context(p) ((p)->hardirq_context) +# define lockdep_hardirq_context() (this_cpu_read(hardirq_context)) # define lockdep_softirq_context(p) ((p)->softirq_context) -# define lockdep_hardirqs_enabled(p) ((p)->hardirqs_enabled) +# define lockdep_hardirqs_enabled() (this_cpu_read(hardirqs_enabled)) # define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled) -# define lockdep_hardirq_enter() \ -do { \ - if (!current->hardirq_context++) \ - current->hardirq_threaded = 0; \ +# define lockdep_hardirq_enter() \ +do { \ + if (this_cpu_inc_return(hardirq_context) == 1) \ + current->hardirq_threaded = 0; \ } while (0) # define lockdep_hardirq_threaded() \ do { \ @@ -50,7 +68,7 @@ do { \ } while (0) # define lockdep_hardirq_exit() \ do { \ - current->hardirq_context--; \ + this_cpu_dec(hardirq_context); \ } while (0) # define lockdep_softirq_enter() \ do { \ @@ -104,9 +122,9 @@ do { \ # define trace_hardirqs_off_finish() do { } while (0) # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) -# define lockdep_hardirq_context(p) 0 +# define lockdep_hardirq_context() 0 # define lockdep_softirq_context(p) 0 -# define lockdep_hardirqs_enabled(p) 0 +# define lockdep_hardirqs_enabled() 0 # define lockdep_softirqs_enabled(p) 0 # define lockdep_hardirq_enter() do { } while (0) # define lockdep_hardirq_threaded() do { } while (0) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 8fce5c98a4b0..39a35699d0d6 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -10,33 +10,15 @@ #ifndef __LINUX_LOCKDEP_H #define __LINUX_LOCKDEP_H +#include <linux/lockdep_types.h> +#include <asm/percpu.h> + struct task_struct; -struct lockdep_map; /* for sysctl */ extern int prove_locking; extern int lock_stat; -#define MAX_LOCKDEP_SUBCLASSES 8UL - -#include <linux/types.h> - -enum lockdep_wait_type { - LD_WAIT_INV = 0, /* not checked, catch all */ - - LD_WAIT_FREE, /* wait free, rcu etc.. */ - LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */ - -#ifdef CONFIG_PROVE_RAW_LOCK_NESTING - LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */ -#else - LD_WAIT_CONFIG = LD_WAIT_SPIN, -#endif - LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */ - - LD_WAIT_MAX, /* must be last */ -}; - #ifdef CONFIG_LOCKDEP #include <linux/linkage.h> @@ -44,147 +26,6 @@ enum lockdep_wait_type { #include <linux/debug_locks.h> #include <linux/stacktrace.h> -/* - * We'd rather not expose kernel/lockdep_states.h this wide, but we do need - * the total number of states... :-( - */ -#define XXX_LOCK_USAGE_STATES (1+2*4) - -/* - * NR_LOCKDEP_CACHING_CLASSES ... Number of classes - * cached in the instance of lockdep_map - * - * Currently main class (subclass == 0) and signle depth subclass - * are cached in lockdep_map. This optimization is mainly targeting - * on rq->lock. double_rq_lock() acquires this highly competitive with - * single depth. - */ -#define NR_LOCKDEP_CACHING_CLASSES 2 - -/* - * A lockdep key is associated with each lock object. For static locks we use - * the lock address itself as the key. Dynamically allocated lock objects can - * have a statically or dynamically allocated key. Dynamically allocated lock - * keys must be registered before being used and must be unregistered before - * the key memory is freed. - */ -struct lockdep_subclass_key { - char __one_byte; -} __attribute__ ((__packed__)); - -/* hash_entry is used to keep track of dynamically allocated keys. */ -struct lock_class_key { - union { - struct hlist_node hash_entry; - struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; - }; -}; - -extern struct lock_class_key __lockdep_no_validate__; - -struct lock_trace; - -#define LOCKSTAT_POINTS 4 - -/* - * The lock-class itself. The order of the structure members matters. - * reinit_class() zeroes the key member and all subsequent members. - */ -struct lock_class { - /* - * class-hash: - */ - struct hlist_node hash_entry; - - /* - * Entry in all_lock_classes when in use. Entry in free_lock_classes - * when not in use. Instances that are being freed are on one of the - * zapped_classes lists. - */ - struct list_head lock_entry; - - /* - * These fields represent a directed graph of lock dependencies, - * to every node we attach a list of "forward" and a list of - * "backward" graph nodes. - */ - struct list_head locks_after, locks_before; - - const struct lockdep_subclass_key *key; - unsigned int subclass; - unsigned int dep_gen_id; - - /* - * IRQ/softirq usage tracking bits: - */ - unsigned long usage_mask; - const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES]; - - /* - * Generation counter, when doing certain classes of graph walking, - * to ensure that we check one node only once: - */ - int name_version; - const char *name; - - short wait_type_inner; - short wait_type_outer; - -#ifdef CONFIG_LOCK_STAT - unsigned long contention_point[LOCKSTAT_POINTS]; - unsigned long contending_point[LOCKSTAT_POINTS]; -#endif -} __no_randomize_layout; - -#ifdef CONFIG_LOCK_STAT -struct lock_time { - s64 min; - s64 max; - s64 total; - unsigned long nr; -}; - -enum bounce_type { - bounce_acquired_write, - bounce_acquired_read, - bounce_contended_write, - bounce_contended_read, - nr_bounce_types, - - bounce_acquired = bounce_acquired_write, - bounce_contended = bounce_contended_write, -}; - -struct lock_class_stats { - unsigned long contention_point[LOCKSTAT_POINTS]; - unsigned long contending_point[LOCKSTAT_POINTS]; - struct lock_time read_waittime; - struct lock_time write_waittime; - struct lock_time read_holdtime; - struct lock_time write_holdtime; - unsigned long bounces[nr_bounce_types]; -}; - -struct lock_class_stats lock_stats(struct lock_class *class); -void clear_lock_stats(struct lock_class *class); -#endif - -/* - * Map the lock object (the lock instance) to the lock-class object. - * This is embedded into specific lock instances: - */ -struct lockdep_map { - struct lock_class_key *key; - struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; - const char *name; - short wait_type_outer; /* can be taken in this context */ - short wait_type_inner; /* presents this context */ -#ifdef CONFIG_LOCK_STAT - int cpu; - unsigned long ip; -#endif -}; - static inline void lockdep_copy_map(struct lockdep_map *to, struct lockdep_map *from) { @@ -440,8 +281,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock, extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip); -struct pin_cookie { unsigned int val; }; - #define NIL_COOKIE (struct pin_cookie){ .val = 0U, } extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock); @@ -520,10 +359,6 @@ static inline void lockdep_set_selftest_task(struct task_struct *task) # define lockdep_reset() do { debug_locks = 1; } while (0) # define lockdep_free_key_range(start, size) do { } while (0) # define lockdep_sys_exit() do { } while (0) -/* - * The class key takes no space if lockdep is disabled: - */ -struct lock_class_key { }; static inline void lockdep_register_key(struct lock_class_key *key) { @@ -533,11 +368,6 @@ static inline void lockdep_unregister_key(struct lock_class_key *key) { } -/* - * The lockdep_map takes no space if lockdep is disabled: - */ -struct lockdep_map { }; - #define lockdep_depth(tsk) (0) #define lockdep_is_held_type(l, r) (1) @@ -549,8 +379,6 @@ struct lockdep_map { }; #define lockdep_recursing(tsk) (0) -struct pin_cookie { }; - #define NIL_COOKIE (struct pin_cookie){ } #define lockdep_pin_lock(l) ({ struct pin_cookie cookie = { }; cookie; }) @@ -703,38 +531,58 @@ do { \ lock_release(&(lock)->dep_map, _THIS_IP_); \ } while (0) -#define lockdep_assert_irqs_enabled() do { \ - WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - !current->hardirqs_enabled, \ - "IRQs not enabled as expected\n"); \ - } while (0) +DECLARE_PER_CPU(int, hardirqs_enabled); +DECLARE_PER_CPU(int, hardirq_context); -#define lockdep_assert_irqs_disabled() do { \ - WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - current->hardirqs_enabled, \ - "IRQs not disabled as expected\n"); \ - } while (0) +#define lockdep_assert_irqs_enabled() \ +do { \ + WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled)); \ +} while (0) -#define lockdep_assert_in_irq() do { \ - WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - !current->hardirq_context, \ - "Not in hardirq as expected\n"); \ - } while (0) +#define lockdep_assert_irqs_disabled() \ +do { \ + WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled)); \ +} while (0) + +#define lockdep_assert_in_irq() \ +do { \ + WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context)); \ +} while (0) + +#define lockdep_assert_preemption_enabled() \ +do { \ + WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ + debug_locks && \ + (preempt_count() != 0 || \ + !this_cpu_read(hardirqs_enabled))); \ +} while (0) + +#define lockdep_assert_preemption_disabled() \ +do { \ + WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ + debug_locks && \ + (preempt_count() == 0 && \ + this_cpu_read(hardirqs_enabled))); \ +} while (0) #else # define might_lock(lock) do { } while (0) # define might_lock_read(lock) do { } while (0) # define might_lock_nested(lock, subclass) do { } while (0) + # define lockdep_assert_irqs_enabled() do { } while (0) # define lockdep_assert_irqs_disabled() do { } while (0) # define lockdep_assert_in_irq() do { } while (0) + +# define lockdep_assert_preemption_enabled() do { } while (0) +# define lockdep_assert_preemption_disabled() do { } while (0) #endif #ifdef CONFIG_PROVE_RAW_LOCK_NESTING # define lockdep_assert_RT_in_threaded_ctx() do { \ WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - current->hardirq_context && \ + lockdep_hardirq_context() && \ !(current->hardirq_threaded || current->irq_config), \ "Not in threaded context on PREEMPT_RT as expected\n"); \ } while (0) diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h new file mode 100644 index 000000000000..bb35b449f533 --- /dev/null +++ b/include/linux/lockdep_types.h @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Runtime locking correctness validator + * + * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra + * + * see Documentation/locking/lockdep-design.rst for more details. + */ +#ifndef __LINUX_LOCKDEP_TYPES_H +#define __LINUX_LOCKDEP_TYPES_H + +#include <linux/types.h> + +#define MAX_LOCKDEP_SUBCLASSES 8UL + +enum lockdep_wait_type { + LD_WAIT_INV = 0, /* not checked, catch all */ + + LD_WAIT_FREE, /* wait free, rcu etc.. */ + LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */ + +#ifdef CONFIG_PROVE_RAW_LOCK_NESTING + LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */ +#else + LD_WAIT_CONFIG = LD_WAIT_SPIN, +#endif + LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */ + + LD_WAIT_MAX, /* must be last */ +}; + +#ifdef CONFIG_LOCKDEP + +/* + * We'd rather not expose kernel/lockdep_states.h this wide, but we do need + * the total number of states... :-( + */ +#define XXX_LOCK_USAGE_STATES (1+2*4) + +/* + * NR_LOCKDEP_CACHING_CLASSES ... Number of classes + * cached in the instance of lockdep_map + * + * Currently main class (subclass == 0) and signle depth subclass + * are cached in lockdep_map. This optimization is mainly targeting + * on rq->lock. double_rq_lock() acquires this highly competitive with + * single depth. + */ +#define NR_LOCKDEP_CACHING_CLASSES 2 + +/* + * A lockdep key is associated with each lock object. For static locks we use + * the lock address itself as the key. Dynamically allocated lock objects can + * have a statically or dynamically allocated key. Dynamically allocated lock + * keys must be registered before being used and must be unregistered before + * the key memory is freed. + */ +struct lockdep_subclass_key { + char __one_byte; +} __attribute__ ((__packed__)); + +/* hash_entry is used to keep track of dynamically allocated keys. */ +struct lock_class_key { + union { + struct hlist_node hash_entry; + struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; + }; +}; + +extern struct lock_class_key __lockdep_no_validate__; + +struct lock_trace; + +#define LOCKSTAT_POINTS 4 + +/* + * The lock-class itself. The order of the structure members matters. + * reinit_class() zeroes the key member and all subsequent members. + */ +struct lock_class { + /* + * class-hash: + */ + struct hlist_node hash_entry; + + /* + * Entry in all_lock_classes when in use. Entry in free_lock_classes + * when not in use. Instances that are being freed are on one of the + * zapped_classes lists. + */ + struct list_head lock_entry; + + /* + * These fields represent a directed graph of lock dependencies, + * to every node we attach a list of "forward" and a list of + * "backward" graph nodes. + */ + struct list_head locks_after, locks_before; + + const struct lockdep_subclass_key *key; + unsigned int subclass; + unsigned int dep_gen_id; + + /* + * IRQ/softirq usage tracking bits: + */ + unsigned long usage_mask; + const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES]; + + /* + * Generation counter, when doing certain classes of graph walking, + * to ensure that we check one node only once: + */ + int name_version; + const char *name; + + short wait_type_inner; + short wait_type_outer; + +#ifdef CONFIG_LOCK_STAT + unsigned long contention_point[LOCKSTAT_POINTS]; + unsigned long contending_point[LOCKSTAT_POINTS]; +#endif +} __no_randomize_layout; + +#ifdef CONFIG_LOCK_STAT +struct lock_time { + s64 min; + s64 max; + s64 total; + unsigned long nr; +}; + +enum bounce_type { + bounce_acquired_write, + bounce_acquired_read, + bounce_contended_write, + bounce_contended_read, + nr_bounce_types, + + bounce_acquired = bounce_acquired_write, + bounce_contended = bounce_contended_write, +}; + +struct lock_class_stats { + unsigned long contention_point[LOCKSTAT_POINTS]; + unsigned long contending_point[LOCKSTAT_POINTS]; + struct lock_time read_waittime; + struct lock_time write_waittime; + struct lock_time read_holdtime; + struct lock_time write_holdtime; + unsigned long bounces[nr_bounce_types]; +}; + +struct lock_class_stats lock_stats(struct lock_class *class); +void clear_lock_stats(struct lock_class *class); +#endif + +/* + * Map the lock object (the lock instance) to the lock-class object. + * This is embedded into specific lock instances: + */ +struct lockdep_map { + struct lock_class_key *key; + struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; + const char *name; + short wait_type_outer; /* can be taken in this context */ + short wait_type_inner; /* presents this context */ +#ifdef CONFIG_LOCK_STAT + int cpu; + unsigned long ip; +#endif +}; + +struct pin_cookie { unsigned int val; }; + +#else /* !CONFIG_LOCKDEP */ + +/* + * The class key takes no space if lockdep is disabled: + */ +struct lock_class_key { }; + +/* + * The lockdep_map takes no space if lockdep is disabled: + */ +struct lockdep_map { }; + +struct pin_cookie { }; + +#endif /* !LOCKDEP */ + +#endif /* __LINUX_LOCKDEP_TYPES_H */ diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 7eed65b5f713..7a6fc9956510 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -248,6 +248,8 @@ static inline void __list_splice_init_rcu(struct list_head *list, */ sync(); + ASSERT_EXCLUSIVE_ACCESS(*first); + ASSERT_EXCLUSIVE_ACCESS(*last); /* * Readers are finished with the source list, so perform splice. diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 7e5b2a4eb560..25e3fde85617 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -60,39 +60,39 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) } #define RWSEM_UNLOCKED_VALUE 0L -#define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) +#define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) /* Common initializer macros and functions */ #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __RWSEM_DEP_MAP_INIT(lockname) \ - , .dep_map = { \ + .dep_map = { \ .name = #lockname, \ .wait_type_inner = LD_WAIT_SLEEP, \ - } + }, #else # define __RWSEM_DEP_MAP_INIT(lockname) #endif #ifdef CONFIG_DEBUG_RWSEMS -# define __DEBUG_RWSEM_INITIALIZER(lockname) , .magic = &lockname +# define __RWSEM_DEBUG_INIT(lockname) .magic = &lockname, #else -# define __DEBUG_RWSEM_INITIALIZER(lockname) +# define __RWSEM_DEBUG_INIT(lockname) #endif #ifdef CONFIG_RWSEM_SPIN_ON_OWNER -#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED +#define __RWSEM_OPT_INIT(lockname) .osq = OSQ_LOCK_UNLOCKED, #else #define __RWSEM_OPT_INIT(lockname) #endif #define __RWSEM_INITIALIZER(name) \ - { __RWSEM_INIT_COUNT(name), \ + { __RWSEM_COUNT_INIT(name), \ .owner = ATOMIC_LONG_INIT(0), \ - .wait_list = LIST_HEAD_INIT((name).wait_list), \ - .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \ __RWSEM_OPT_INIT(name) \ - __DEBUG_RWSEM_INITIALIZER(name) \ + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\ + .wait_list = LIST_HEAD_INIT((name).wait_list), \ + __RWSEM_DEBUG_INIT(name) \ __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 683372943093..060e9214c8b5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -18,6 +18,7 @@ #include <linux/mutex.h> #include <linux/plist.h> #include <linux/hrtimer.h> +#include <linux/irqflags.h> #include <linux/seccomp.h> #include <linux/nodemask.h> #include <linux/rcupdate.h> @@ -980,19 +981,9 @@ struct task_struct { #endif #ifdef CONFIG_TRACE_IRQFLAGS - unsigned int irq_events; + struct irqtrace_events irqtrace; unsigned int hardirq_threaded; - unsigned long hardirq_enable_ip; - unsigned long hardirq_disable_ip; - unsigned int hardirq_enable_event; - unsigned int hardirq_disable_event; - int hardirqs_enabled; - int hardirq_context; u64 hardirq_chain_key; - unsigned long softirq_disable_ip; - unsigned long softirq_enable_ip; - unsigned int softirq_disable_event; - unsigned int softirq_enable_event; int softirqs_enabled; int softirq_context; int irq_config; @@ -1193,8 +1184,12 @@ struct task_struct { #ifdef CONFIG_KASAN unsigned int kasan_depth; #endif + #ifdef CONFIG_KCSAN struct kcsan_ctx kcsan_ctx; +#ifdef CONFIG_TRACE_IRQFLAGS + struct irqtrace_events kcsan_save_irqtrace; +#endif #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 8b97204f35a7..54bc20496392 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -1,36 +1,15 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_SEQLOCK_H #define __LINUX_SEQLOCK_H + /* - * Reader/writer consistent mechanism without starving writers. This type of - * lock for data where the reader wants a consistent set of information - * and is willing to retry if the information changes. There are two types - * of readers: - * 1. Sequence readers which never block a writer but they may have to retry - * if a writer is in progress by detecting change in sequence number. - * Writers do not wait for a sequence reader. - * 2. Locking readers which will wait if a writer or another locking reader - * is in progress. A locking reader in progress will also block a writer - * from going forward. Unlike the regular rwlock, the read lock here is - * exclusive so that only one locking reader can get it. - * - * This is not as cache friendly as brlock. Also, this may not work well - * for data that contains pointers, because any writer could - * invalidate a pointer that a reader was following. - * - * Expected non-blocking reader usage: - * do { - * seq = read_seqbegin(&foo); - * ... - * } while (read_seqretry(&foo, seq)); - * + * seqcount_t / seqlock_t - a reader-writer consistency mechanism with + * lockless readers (read-only retry loops), and no writer starvation. * - * On non-SMP the spin locks disappear but the writer still needs - * to increment the sequence variables because an interrupt routine could - * change the state of the data. + * See Documentation/locking/seqlock.rst * - * Based on x86_64 vsyscall gettimeofday - * by Keith Owens and Andrea Arcangeli + * Copyrights: + * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli */ #include <linux/spinlock.h> @@ -41,8 +20,8 @@ #include <asm/processor.h> /* - * The seqlock interface does not prescribe a precise sequence of read - * begin/retry/end. For readers, typically there is a call to + * The seqlock seqcount_t interface does not prescribe a precise sequence of + * read begin/retry/end. For readers, typically there is a call to * read_seqcount_begin() and read_seqcount_retry(), however, there are more * esoteric cases which do not follow this pattern. * @@ -50,16 +29,30 @@ * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as * atomics; if there is a matching read_seqcount_retry() call, no following - * memory operations are considered atomic. Usage of seqlocks via seqlock_t - * interface is not affected. + * memory operations are considered atomic. Usage of the seqlock_t interface + * is not affected. */ #define KCSAN_SEQLOCK_REGION_MAX 1000 /* - * Version using sequence counter only. - * This can be used when code has its own mutex protecting the - * updating starting before the write_seqcountbeqin() and ending - * after the write_seqcount_end(). + * Sequence counters (seqcount_t) + * + * This is the raw counting mechanism, without any writer protection. + * + * Write side critical sections must be serialized and non-preemptible. + * + * If readers can be invoked from hardirq or softirq contexts, + * interrupts or bottom halves must also be respectively disabled before + * entering the write section. + * + * This mechanism can't be used if the protected data contains pointers, + * as the writer can invalidate a pointer that a reader is following. + * + * If it's desired to automatically handle the sequence counter writer + * serialization and non-preemptibility requirements, use a sequential + * lock (seqlock_t) instead. + * + * See Documentation/locking/seqlock.rst */ typedef struct seqcount { unsigned sequence; @@ -82,6 +75,10 @@ static inline void __seqcount_init(seqcount_t *s, const char *name, # define SEQCOUNT_DEP_MAP_INIT(lockname) \ .dep_map = { .name = #lockname } \ +/** + * seqcount_init() - runtime initializer for seqcount_t + * @s: Pointer to the seqcount_t instance + */ # define seqcount_init(s) \ do { \ static struct lock_class_key __key; \ @@ -105,13 +102,15 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) # define seqcount_lockdep_reader_access(x) #endif -#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)} - +/** + * SEQCNT_ZERO() - static initializer for seqcount_t + * @name: Name of the seqcount_t instance + */ +#define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) } /** - * __read_seqcount_begin - begin a seq-read critical section (without barrier) - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier + * @s: Pointer to seqcount_t * * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() * barrier. Callers should ensure that smp_rmb() or equivalent ordering is @@ -120,6 +119,8 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * * Use carefully, only in critical code, and comment how the barrier is * provided. + * + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned __read_seqcount_begin(const seqcount_t *s) { @@ -136,30 +137,10 @@ repeat: } /** - * raw_read_seqcount - Read the raw seqcount - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep + * @s: Pointer to seqcount_t * - * raw_read_seqcount opens a read critical section of the given - * seqcount without any lockdep checking and without checking or - * masking the LSB. Calling code is responsible for handling that. - */ -static inline unsigned raw_read_seqcount(const seqcount_t *s) -{ - unsigned ret = READ_ONCE(s->sequence); - smp_rmb(); - kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); - return ret; -} - -/** - * raw_read_seqcount_begin - start seq-read critical section w/o lockdep - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry - * - * raw_read_seqcount_begin opens a read critical section of the given - * seqcount, but without any lockdep checking. Validity of the critical - * section is tested by checking read_seqcount_retry function. + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) { @@ -169,13 +150,10 @@ static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) } /** - * read_seqcount_begin - begin a seq-read critical section - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * read_seqcount_begin() - begin a seqcount_t read critical section + * @s: Pointer to seqcount_t * - * read_seqcount_begin opens a read critical section of the given seqcount. - * Validity of the critical section is tested by checking read_seqcount_retry - * function. + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned read_seqcount_begin(const seqcount_t *s) { @@ -184,32 +162,54 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s) } /** - * raw_seqcount_begin - begin a seq-read critical section - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * raw_read_seqcount() - read the raw seqcount_t counter value + * @s: Pointer to seqcount_t * - * raw_seqcount_begin opens a read critical section of the given seqcount. - * Validity of the critical section is tested by checking read_seqcount_retry - * function. + * raw_read_seqcount opens a read critical section of the given + * seqcount_t, without any lockdep checking, and without checking or + * masking the sequence counter LSB. Calling code is responsible for + * handling that. * - * Unlike read_seqcount_begin(), this function will not wait for the count - * to stabilize. If a writer is active when we begin, we will fail the - * read_seqcount_retry() instead of stabilizing at the beginning of the - * critical section. + * Return: count to be passed to read_seqcount_retry() */ -static inline unsigned raw_seqcount_begin(const seqcount_t *s) +static inline unsigned raw_read_seqcount(const seqcount_t *s) { unsigned ret = READ_ONCE(s->sequence); smp_rmb(); kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); - return ret & ~1; + return ret; } /** - * __read_seqcount_retry - end a seq-read critical section (without barrier) - * @s: pointer to seqcount_t - * @start: count, from read_seqcount_begin - * Returns: 1 if retry is required, else 0 + * raw_seqcount_begin() - begin a seqcount_t read critical section w/o + * lockdep and w/o counter stabilization + * @s: Pointer to seqcount_t + * + * raw_seqcount_begin opens a read critical section of the given + * seqcount_t. Unlike read_seqcount_begin(), this function will not wait + * for the count to stabilize. If a writer is active when it begins, it + * will fail the read_seqcount_retry() at the end of the read critical + * section instead of stabilizing at the beginning of it. + * + * Use this only in special kernel hot paths where the read section is + * small and has a high probability of success through other external + * means. It will save a single branching instruction. + * + * Return: count to be passed to read_seqcount_retry() + */ +static inline unsigned raw_seqcount_begin(const seqcount_t *s) +{ + /* + * If the counter is odd, let read_seqcount_retry() fail + * by decrementing the counter. + */ + return raw_read_seqcount(s) & ~1; +} + +/** + * __read_seqcount_retry() - end a seqcount_t read section w/o barrier + * @s: Pointer to seqcount_t + * @start: count, from read_seqcount_begin() * * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() * barrier. Callers should ensure that smp_rmb() or equivalent ordering is @@ -218,6 +218,8 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s) * * Use carefully, only in critical code, and comment how the barrier is * provided. + * + * Return: true if a read section retry is required, else false */ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -226,14 +228,15 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) } /** - * read_seqcount_retry - end a seq-read critical section - * @s: pointer to seqcount_t - * @start: count, from read_seqcount_begin - * Returns: 1 if retry is required, else 0 + * read_seqcount_retry() - end a seqcount_t read critical section + * @s: Pointer to seqcount_t + * @start: count, from read_seqcount_begin() * - * read_seqcount_retry closes a read critical section of the given seqcount. - * If the critical section was invalid, it must be ignored (and typically - * retried). + * read_seqcount_retry closes the read critical section of given + * seqcount_t. If the critical section was invalid, it must be ignored + * (and typically retried). + * + * Return: true if a read section retry is required, else false */ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -241,8 +244,10 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) return __read_seqcount_retry(s, start); } - - +/** + * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep + * @s: Pointer to seqcount_t + */ static inline void raw_write_seqcount_begin(seqcount_t *s) { kcsan_nestable_atomic_begin(); @@ -250,6 +255,10 @@ static inline void raw_write_seqcount_begin(seqcount_t *s) smp_wmb(); } +/** + * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep + * @s: Pointer to seqcount_t + */ static inline void raw_write_seqcount_end(seqcount_t *s) { smp_wmb(); @@ -257,45 +266,104 @@ static inline void raw_write_seqcount_end(seqcount_t *s) kcsan_nestable_atomic_end(); } +static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass) +{ + raw_write_seqcount_begin(s); + seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); +} + /** - * raw_write_seqcount_barrier - do a seq write barrier - * @s: pointer to seqcount_t + * write_seqcount_begin_nested() - start a seqcount_t write section with + * custom lockdep nesting level + * @s: Pointer to seqcount_t + * @subclass: lockdep nesting level * - * This can be used to provide an ordering guarantee instead of the - * usual consistency guarantee. It is one wmb cheaper, because we can - * collapse the two back-to-back wmb()s. + * See Documentation/locking/lockdep-design.rst + */ +static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) +{ + lockdep_assert_preemption_disabled(); + __write_seqcount_begin_nested(s, subclass); +} + +/* + * A write_seqcount_begin() variant w/o lockdep non-preemptibility checks. + * + * Use for internal seqlock.h code where it's known that preemption is + * already disabled. For example, seqlock_t write side functions. + */ +static inline void __write_seqcount_begin(seqcount_t *s) +{ + __write_seqcount_begin_nested(s, 0); +} + +/** + * write_seqcount_begin() - start a seqcount_t write side critical section + * @s: Pointer to seqcount_t + * + * write_seqcount_begin opens a write side critical section of the given + * seqcount_t. + * + * Context: seqcount_t write side critical sections must be serialized and + * non-preemptible. If readers can be invoked from hardirq or softirq + * context, interrupts or bottom halves must be respectively disabled. + */ +static inline void write_seqcount_begin(seqcount_t *s) +{ + write_seqcount_begin_nested(s, 0); +} + +/** + * write_seqcount_end() - end a seqcount_t write side critical section + * @s: Pointer to seqcount_t + * + * The write section must've been opened with write_seqcount_begin(). + */ +static inline void write_seqcount_end(seqcount_t *s) +{ + seqcount_release(&s->dep_map, _RET_IP_); + raw_write_seqcount_end(s); +} + +/** + * raw_write_seqcount_barrier() - do a seqcount_t write barrier + * @s: Pointer to seqcount_t + * + * This can be used to provide an ordering guarantee instead of the usual + * consistency guarantee. It is one wmb cheaper, because it can collapse + * the two back-to-back wmb()s. * * Note that writes surrounding the barrier should be declared atomic (e.g. * via WRITE_ONCE): a) to ensure the writes become visible to other threads * atomically, avoiding compiler optimizations; b) to document which writes are * meant to propagate to the reader critical section. This is necessary because * neither writes before and after the barrier are enclosed in a seq-writer - * critical section that would ensure readers are aware of ongoing writes. + * critical section that would ensure readers are aware of ongoing writes:: * - * seqcount_t seq; - * bool X = true, Y = false; + * seqcount_t seq; + * bool X = true, Y = false; * - * void read(void) - * { - * bool x, y; + * void read(void) + * { + * bool x, y; * - * do { - * int s = read_seqcount_begin(&seq); + * do { + * int s = read_seqcount_begin(&seq); * - * x = X; y = Y; + * x = X; y = Y; * - * } while (read_seqcount_retry(&seq, s)); + * } while (read_seqcount_retry(&seq, s)); * - * BUG_ON(!x && !y); + * BUG_ON(!x && !y); * } * * void write(void) * { - * WRITE_ONCE(Y, true); + * WRITE_ONCE(Y, true); * - * raw_write_seqcount_barrier(seq); + * raw_write_seqcount_barrier(seq); * - * WRITE_ONCE(X, false); + * WRITE_ONCE(X, false); * } */ static inline void raw_write_seqcount_barrier(seqcount_t *s) @@ -307,6 +375,37 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) kcsan_nestable_atomic_end(); } +/** + * write_seqcount_invalidate() - invalidate in-progress seqcount_t read + * side operations + * @s: Pointer to seqcount_t + * + * After write_seqcount_invalidate, no seqcount_t read side operations + * will complete successfully and see data older than this. + */ +static inline void write_seqcount_invalidate(seqcount_t *s) +{ + smp_wmb(); + kcsan_nestable_atomic_begin(); + s->sequence+=2; + kcsan_nestable_atomic_end(); +} + +/** + * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy + * @s: Pointer to seqcount_t + * + * Use seqcount_t latching to switch between two storage places protected + * by a sequence counter. Doing so allows having interruptible, preemptible, + * seqcount_t write side critical sections. + * + * Check raw_write_seqcount_latch() for more details and a full reader and + * writer usage example. + * + * Return: sequence counter raw value. Use the lowest bit as an index for + * picking which data copy to read. The full counter value must then be + * checked with read_seqcount_retry(). + */ static inline int raw_read_seqcount_latch(seqcount_t *s) { /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ @@ -315,8 +414,8 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) } /** - * raw_write_seqcount_latch - redirect readers to even/odd copy - * @s: pointer to seqcount_t + * raw_write_seqcount_latch() - redirect readers to even/odd copy + * @s: Pointer to seqcount_t * * The latch technique is a multiversion concurrency control method that allows * queries during non-atomic modifications. If you can guarantee queries never @@ -332,64 +431,68 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) * Very simply put: we first modify one copy and then the other. This ensures * there is always one copy in a stable state, ready to give us an answer. * - * The basic form is a data structure like: + * The basic form is a data structure like:: * - * struct latch_struct { - * seqcount_t seq; - * struct data_struct data[2]; - * }; + * struct latch_struct { + * seqcount_t seq; + * struct data_struct data[2]; + * }; * * Where a modification, which is assumed to be externally serialized, does the - * following: + * following:: * - * void latch_modify(struct latch_struct *latch, ...) - * { - * smp_wmb(); <- Ensure that the last data[1] update is visible - * latch->seq++; - * smp_wmb(); <- Ensure that the seqcount update is visible + * void latch_modify(struct latch_struct *latch, ...) + * { + * smp_wmb(); // Ensure that the last data[1] update is visible + * latch->seq++; + * smp_wmb(); // Ensure that the seqcount update is visible * - * modify(latch->data[0], ...); + * modify(latch->data[0], ...); * - * smp_wmb(); <- Ensure that the data[0] update is visible - * latch->seq++; - * smp_wmb(); <- Ensure that the seqcount update is visible + * smp_wmb(); // Ensure that the data[0] update is visible + * latch->seq++; + * smp_wmb(); // Ensure that the seqcount update is visible * - * modify(latch->data[1], ...); - * } + * modify(latch->data[1], ...); + * } * - * The query will have a form like: + * The query will have a form like:: * - * struct entry *latch_query(struct latch_struct *latch, ...) - * { - * struct entry *entry; - * unsigned seq, idx; + * struct entry *latch_query(struct latch_struct *latch, ...) + * { + * struct entry *entry; + * unsigned seq, idx; * - * do { - * seq = raw_read_seqcount_latch(&latch->seq); + * do { + * seq = raw_read_seqcount_latch(&latch->seq); * - * idx = seq & 0x01; - * entry = data_query(latch->data[idx], ...); + * idx = seq & 0x01; + * entry = data_query(latch->data[idx], ...); * - * smp_rmb(); - * } while (seq != latch->seq); + * // read_seqcount_retry() includes needed smp_rmb() + * } while (read_seqcount_retry(&latch->seq, seq)); * - * return entry; - * } + * return entry; + * } * * So during the modification, queries are first redirected to data[1]. Then we * modify data[0]. When that is complete, we redirect queries back to data[0] * and we can modify data[1]. * - * NOTE: The non-requirement for atomic modifications does _NOT_ include - * the publishing of new entries in the case where data is a dynamic - * data structure. + * NOTE: * - * An iteration might start in data[0] and get suspended long enough - * to miss an entire modification sequence, once it resumes it might - * observe the new entry. + * The non-requirement for atomic modifications does _NOT_ include + * the publishing of new entries in the case where data is a dynamic + * data structure. * - * NOTE: When data is a dynamic data structure; one should use regular RCU - * patterns to manage the lifetimes of the objects within. + * An iteration might start in data[0] and get suspended long enough + * to miss an entire modification sequence, once it resumes it might + * observe the new entry. + * + * NOTE: + * + * When data is a dynamic data structure; one should use regular RCU + * patterns to manage the lifetimes of the objects within. */ static inline void raw_write_seqcount_latch(seqcount_t *s) { @@ -399,67 +502,48 @@ static inline void raw_write_seqcount_latch(seqcount_t *s) } /* - * Sequence counter only version assumes that callers are using their - * own mutexing. - */ -static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) -{ - raw_write_seqcount_begin(s); - seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); -} - -static inline void write_seqcount_begin(seqcount_t *s) -{ - write_seqcount_begin_nested(s, 0); -} - -static inline void write_seqcount_end(seqcount_t *s) -{ - seqcount_release(&s->dep_map, _RET_IP_); - raw_write_seqcount_end(s); -} - -/** - * write_seqcount_invalidate - invalidate in-progress read-side seq operations - * @s: pointer to seqcount_t + * Sequential locks (seqlock_t) * - * After write_seqcount_invalidate, no read-side seq operations will complete - * successfully and see data older than this. + * Sequence counters with an embedded spinlock for writer serialization + * and non-preemptibility. + * + * For more info, see: + * - Comments on top of seqcount_t + * - Documentation/locking/seqlock.rst */ -static inline void write_seqcount_invalidate(seqcount_t *s) -{ - smp_wmb(); - kcsan_nestable_atomic_begin(); - s->sequence+=2; - kcsan_nestable_atomic_end(); -} - typedef struct { struct seqcount seqcount; spinlock_t lock; } seqlock_t; -/* - * These macros triggered gcc-3.x compile-time problems. We think these are - * OK now. Be cautious. - */ #define __SEQLOCK_UNLOCKED(lockname) \ { \ .seqcount = SEQCNT_ZERO(lockname), \ .lock = __SPIN_LOCK_UNLOCKED(lockname) \ } -#define seqlock_init(x) \ +/** + * seqlock_init() - dynamic initializer for seqlock_t + * @sl: Pointer to the seqlock_t instance + */ +#define seqlock_init(sl) \ do { \ - seqcount_init(&(x)->seqcount); \ - spin_lock_init(&(x)->lock); \ + seqcount_init(&(sl)->seqcount); \ + spin_lock_init(&(sl)->lock); \ } while (0) -#define DEFINE_SEQLOCK(x) \ - seqlock_t x = __SEQLOCK_UNLOCKED(x) +/** + * DEFINE_SEQLOCK() - Define a statically allocated seqlock_t + * @sl: Name of the seqlock_t instance + */ +#define DEFINE_SEQLOCK(sl) \ + seqlock_t sl = __SEQLOCK_UNLOCKED(sl) -/* - * Read side functions for starting and finalizing a read side section. +/** + * read_seqbegin() - start a seqlock_t read side critical section + * @sl: Pointer to seqlock_t + * + * Return: count, to be passed to read_seqretry() */ static inline unsigned read_seqbegin(const seqlock_t *sl) { @@ -470,6 +554,17 @@ static inline unsigned read_seqbegin(const seqlock_t *sl) return ret; } +/** + * read_seqretry() - end a seqlock_t read side section + * @sl: Pointer to seqlock_t + * @start: count, from read_seqbegin() + * + * read_seqretry closes the read side critical section of given seqlock_t. + * If the critical section was invalid, it must be ignored (and typically + * retried). + * + * Return: true if a read section retry is required, else false + */ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { /* @@ -481,41 +576,85 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) return read_seqcount_retry(&sl->seqcount, start); } -/* - * Lock out other writers and update the count. - * Acts like a normal spin_lock/unlock. - * Don't need preempt_disable() because that is in the spin_lock already. +/** + * write_seqlock() - start a seqlock_t write side critical section + * @sl: Pointer to seqlock_t + * + * write_seqlock opens a write side critical section for the given + * seqlock_t. It also implicitly acquires the spinlock_t embedded inside + * that sequential lock. All seqlock_t write side sections are thus + * automatically serialized and non-preemptible. + * + * Context: if the seqlock_t read section, or other write side critical + * sections, can be invoked from hardirq or softirq contexts, use the + * _irqsave or _bh variants of this function instead. */ static inline void write_seqlock(seqlock_t *sl) { spin_lock(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); } +/** + * write_sequnlock() - end a seqlock_t write side critical section + * @sl: Pointer to seqlock_t + * + * write_sequnlock closes the (serialized and non-preemptible) write side + * critical section of given seqlock_t. + */ static inline void write_sequnlock(seqlock_t *sl) { write_seqcount_end(&sl->seqcount); spin_unlock(&sl->lock); } +/** + * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section + * @sl: Pointer to seqlock_t + * + * _bh variant of write_seqlock(). Use only if the read side section, or + * other write side sections, can be invoked from softirq contexts. + */ static inline void write_seqlock_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); } +/** + * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section + * @sl: Pointer to seqlock_t + * + * write_sequnlock_bh closes the serialized, non-preemptible, and + * softirqs-disabled, seqlock_t write side critical section opened with + * write_seqlock_bh(). + */ static inline void write_sequnlock_bh(seqlock_t *sl) { write_seqcount_end(&sl->seqcount); spin_unlock_bh(&sl->lock); } +/** + * write_seqlock_irq() - start a non-interruptible seqlock_t write section + * @sl: Pointer to seqlock_t + * + * _irq variant of write_seqlock(). Use only if the read side section, or + * other write sections, can be invoked from hardirq contexts. + */ static inline void write_seqlock_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); } +/** + * write_sequnlock_irq() - end a non-interruptible seqlock_t write section + * @sl: Pointer to seqlock_t + * + * write_sequnlock_irq closes the serialized and non-interruptible + * seqlock_t write side section opened with write_seqlock_irq(). + */ static inline void write_sequnlock_irq(seqlock_t *sl) { write_seqcount_end(&sl->seqcount); @@ -527,13 +666,32 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) unsigned long flags; spin_lock_irqsave(&sl->lock, flags); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); return flags; } +/** + * write_seqlock_irqsave() - start a non-interruptible seqlock_t write + * section + * @lock: Pointer to seqlock_t + * @flags: Stack-allocated storage for saving caller's local interrupt + * state, to be passed to write_sequnlock_irqrestore(). + * + * _irqsave variant of write_seqlock(). Use it only if the read side + * section, or other write sections, can be invoked from hardirq context. + */ #define write_seqlock_irqsave(lock, flags) \ do { flags = __write_seqlock_irqsave(lock); } while (0) +/** + * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write + * section + * @sl: Pointer to seqlock_t + * @flags: Caller's saved interrupt state, from write_seqlock_irqsave() + * + * write_sequnlock_irqrestore closes the serialized and non-interruptible + * seqlock_t write section previously opened with write_seqlock_irqsave(). + */ static inline void write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) { @@ -541,65 +699,79 @@ write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) spin_unlock_irqrestore(&sl->lock, flags); } -/* - * A locking reader exclusively locks out other writers and locking readers, - * but doesn't update the sequence number. Acts like a normal spin_lock/unlock. - * Don't need preempt_disable() because that is in the spin_lock already. +/** + * read_seqlock_excl() - begin a seqlock_t locking reader section + * @sl: Pointer to seqlock_t + * + * read_seqlock_excl opens a seqlock_t locking reader critical section. A + * locking reader exclusively locks out *both* other writers *and* other + * locking readers, but it does not update the embedded sequence number. + * + * Locking readers act like a normal spin_lock()/spin_unlock(). + * + * Context: if the seqlock_t write section, *or other read sections*, can + * be invoked from hardirq or softirq contexts, use the _irqsave or _bh + * variant of this function instead. + * + * The opened read section must be closed with read_sequnlock_excl(). */ static inline void read_seqlock_excl(seqlock_t *sl) { spin_lock(&sl->lock); } +/** + * read_sequnlock_excl() - end a seqlock_t locking reader critical section + * @sl: Pointer to seqlock_t + */ static inline void read_sequnlock_excl(seqlock_t *sl) { spin_unlock(&sl->lock); } /** - * read_seqbegin_or_lock - begin a sequence number check or locking block - * @lock: sequence lock - * @seq : sequence number to be checked + * read_seqlock_excl_bh() - start a seqlock_t locking reader section with + * softirqs disabled + * @sl: Pointer to seqlock_t * - * First try it once optimistically without taking the lock. If that fails, - * take the lock. The sequence number is also used as a marker for deciding - * whether to be a reader (even) or writer (odd). - * N.B. seq must be initialized to an even number to begin with. + * _bh variant of read_seqlock_excl(). Use this variant only if the + * seqlock_t write side section, *or other read sections*, can be invoked + * from softirq contexts. */ -static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) -{ - if (!(*seq & 1)) /* Even */ - *seq = read_seqbegin(lock); - else /* Odd */ - read_seqlock_excl(lock); -} - -static inline int need_seqretry(seqlock_t *lock, int seq) -{ - return !(seq & 1) && read_seqretry(lock, seq); -} - -static inline void done_seqretry(seqlock_t *lock, int seq) -{ - if (seq & 1) - read_sequnlock_excl(lock); -} - static inline void read_seqlock_excl_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); } +/** + * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking + * reader section + * @sl: Pointer to seqlock_t + */ static inline void read_sequnlock_excl_bh(seqlock_t *sl) { spin_unlock_bh(&sl->lock); } +/** + * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking + * reader section + * @sl: Pointer to seqlock_t + * + * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t + * write side section, *or other read sections*, can be invoked from a + * hardirq context. + */ static inline void read_seqlock_excl_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); } +/** + * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t + * locking reader section + * @sl: Pointer to seqlock_t + */ static inline void read_sequnlock_excl_irq(seqlock_t *sl) { spin_unlock_irq(&sl->lock); @@ -613,15 +785,117 @@ static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) return flags; } +/** + * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t + * locking reader section + * @lock: Pointer to seqlock_t + * @flags: Stack-allocated storage for saving caller's local interrupt + * state, to be passed to read_sequnlock_excl_irqrestore(). + * + * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t + * write side section, *or other read sections*, can be invoked from a + * hardirq context. + */ #define read_seqlock_excl_irqsave(lock, flags) \ do { flags = __read_seqlock_excl_irqsave(lock); } while (0) +/** + * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t + * locking reader section + * @sl: Pointer to seqlock_t + * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave() + */ static inline void read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) { spin_unlock_irqrestore(&sl->lock, flags); } +/** + * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader + * @lock: Pointer to seqlock_t + * @seq : Marker and return parameter. If the passed value is even, the + * reader will become a *lockless* seqlock_t reader as in read_seqbegin(). + * If the passed value is odd, the reader will become a *locking* reader + * as in read_seqlock_excl(). In the first call to this function, the + * caller *must* initialize and pass an even value to @seq; this way, a + * lockless read can be optimistically tried first. + * + * read_seqbegin_or_lock is an API designed to optimistically try a normal + * lockless seqlock_t read section first. If an odd counter is found, the + * lockless read trial has failed, and the next read iteration transforms + * itself into a full seqlock_t locking reader. + * + * This is typically used to avoid seqlock_t lockless readers starvation + * (too much retry loops) in the case of a sharp spike in write side + * activity. + * + * Context: if the seqlock_t write section, *or other read sections*, can + * be invoked from hardirq or softirq contexts, use the _irqsave or _bh + * variant of this function instead. + * + * Check Documentation/locking/seqlock.rst for template example code. + * + * Return: the encountered sequence counter value, through the @seq + * parameter, which is overloaded as a return parameter. This returned + * value must be checked with need_seqretry(). If the read section need to + * be retried, this returned value must also be passed as the @seq + * parameter of the next read_seqbegin_or_lock() iteration. + */ +static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) +{ + if (!(*seq & 1)) /* Even */ + *seq = read_seqbegin(lock); + else /* Odd */ + read_seqlock_excl(lock); +} + +/** + * need_seqretry() - validate seqlock_t "locking or lockless" read section + * @lock: Pointer to seqlock_t + * @seq: sequence count, from read_seqbegin_or_lock() + * + * Return: true if a read section retry is required, false otherwise + */ +static inline int need_seqretry(seqlock_t *lock, int seq) +{ + return !(seq & 1) && read_seqretry(lock, seq); +} + +/** + * done_seqretry() - end seqlock_t "locking or lockless" reader section + * @lock: Pointer to seqlock_t + * @seq: count, from read_seqbegin_or_lock() + * + * done_seqretry finishes the seqlock_t read side critical section started + * with read_seqbegin_or_lock() and validated by need_seqretry(). + */ +static inline void done_seqretry(seqlock_t *lock, int seq) +{ + if (seq & 1) + read_sequnlock_excl(lock); +} + +/** + * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or + * a non-interruptible locking reader + * @lock: Pointer to seqlock_t + * @seq: Marker and return parameter. Check read_seqbegin_or_lock(). + * + * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if + * the seqlock_t write section, *or other read sections*, can be invoked + * from hardirq context. + * + * Note: Interrupts will be disabled only for "locking reader" mode. + * + * Return: + * + * 1. The saved local interrupts state in case of a locking reader, to + * be passed to done_seqretry_irqrestore(). + * + * 2. The encountered sequence counter value, returned through @seq + * overloaded as a return parameter. Check read_seqbegin_or_lock(). + */ static inline unsigned long read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) { @@ -635,6 +909,18 @@ read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) return flags; } +/** + * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a + * non-interruptible locking reader section + * @lock: Pointer to seqlock_t + * @seq: Count, from read_seqbegin_or_lock_irqsave() + * @flags: Caller's saved local interrupt state in case of a locking + * reader, also from read_seqbegin_or_lock_irqsave() + * + * This is the _irqrestore variant of done_seqretry(). The read section + * must've been opened with read_seqbegin_or_lock_irqsave(), and validated + * by need_seqretry(). + */ static inline void done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) { diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index d3770b3f9d9a..f2f12d746dbd 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -56,6 +56,7 @@ #include <linux/kernel.h> #include <linux/stringify.h> #include <linux/bottom_half.h> +#include <linux/lockdep.h> #include <asm/barrier.h> #include <asm/mmiowb.h> diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 6102e6bff3ae..b981caafe8bf 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -15,7 +15,7 @@ # include <linux/spinlock_types_up.h> #endif -#include <linux/lockdep.h> +#include <linux/lockdep_types.h> typedef struct raw_spinlock { arch_spinlock_t raw_lock; diff --git a/include/linux/types.h b/include/linux/types.h index d3021c879179..a147977602b5 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -167,6 +167,8 @@ typedef struct { int counter; } atomic_t; +#define ATOMIC_INIT(i) { (i) } + #ifdef CONFIG_64BIT typedef struct { s64 counter; |