From 0d43eb34f9aabcddf41c99b7af2d0ced33e9a3cc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Aug 2015 14:17:29 -0700 Subject: rcu: Invert passed_quiesce and rename to cpu_no_qs This commit inverts the sense of the rcu_data structure's ->passed_quiesce field and renames it to ->cpu_no_qs. This will allow a later commit to use an "aggregate OR" operation to test expedited as well as normal grace periods without added overhead. Signed-off-by: Paul E. McKenney --- Documentation/RCU/trace.txt | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 97f17e9decda..ec6998b1b6d0 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -56,14 +56,14 @@ rcuboost: The output of "cat rcu/rcu_preempt/rcudata" looks as follows: - 0!c=30455 g=30456 pq=1/0 qp=1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716 - 1!c=30719 g=30720 pq=1/0 qp=0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982 - 2!c=30150 g=30151 pq=1/1 qp=1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458 - 3 c=31249 g=31250 pq=1/1 qp=0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622 - 4!c=29502 g=29503 pq=1/0 qp=1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521 - 5 c=31201 g=31202 pq=1/0 qp=1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698 - 6!c=30253 g=30254 pq=1/0 qp=1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353 - 7 c=31178 g=31178 pq=1/0 qp=0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969 + 0!c=30455 g=30456 cnq=1/0:1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716 + 1!c=30719 g=30720 cnq=1/0:0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982 + 2!c=30150 g=30151 cnq=1/1:1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458 + 3 c=31249 g=31250 cnq=1/1:0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622 + 4!c=29502 g=29503 cnq=1/0:1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521 + 5 c=31201 g=31202 cnq=1/0:1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698 + 6!c=30253 g=30254 cnq=1/0:1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353 + 7 c=31178 g=31178 cnq=1/0:0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969 This file has one line per CPU, or eight for this 8-CPU system. The fields are as follows: @@ -188,14 +188,14 @@ o "ca" is the number of RCU callbacks that have been adopted by this Kernels compiled with CONFIG_RCU_BOOST=y display the following from /debug/rcu/rcu_preempt/rcudata: - 0!c=12865 g=12866 pq=1/0 qp=1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871 - 1 c=14407 g=14408 pq=1/0 qp=0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485 - 2 c=14407 g=14408 pq=1/0 qp=0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490 - 3 c=14407 g=14408 pq=1/0 qp=0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290 - 4 c=14405 g=14406 pq=1/0 qp=1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114 - 5!c=14168 g=14169 pq=1/0 qp=0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722 - 6 c=14404 g=14405 pq=1/0 qp=0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811 - 7 c=14407 g=14408 pq=1/0 qp=1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042 + 0!c=12865 g=12866 cnq=1/0:1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871 + 1 c=14407 g=14408 cnq=1/0:0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485 + 2 c=14407 g=14408 cnq=1/0:0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490 + 3 c=14407 g=14408 cnq=1/0:0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290 + 4 c=14405 g=14406 cnq=1/0:1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114 + 5!c=14168 g=14169 cnq=1/0:0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722 + 6 c=14404 g=14405 cnq=1/0:0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811 + 7 c=14407 g=14408 cnq=1/0:1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042 This is similar to the output discussed above, but contains the following additional fields: -- cgit v1.2.3 From ee968ac61d5a3440b931375d81113e0eedfcb249 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 31 Jul 2015 08:28:35 -0700 Subject: rcu: Eliminate panic when silly boot-time fanout specified This commit loosens rcutree.rcu_fanout_leaf range checks and replaces a panic() with a fallback to compile-time values. This fallback is accompanied by a WARN_ON(), and both occur when the rcutree.rcu_fanout_leaf value is too small to accommodate the number of CPUs. For example, given the current four-level limit for the rcu_node tree, a system with more than 16 CPUs built with CONFIG_FANOUT=2 must have rcutree.rcu_fanout_leaf larger than 2. Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- Documentation/kernel-parameters.txt | 9 ++++++--- kernel/rcu/tree.c | 20 +++++++++++--------- 2 files changed, 17 insertions(+), 12 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 22a4b687ea5b..23ec96877311 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3074,9 +3074,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. cache-to-cache transfer latencies. rcutree.rcu_fanout_leaf= [KNL] - Increase the number of CPUs assigned to each - leaf rcu_node structure. Useful for very large - systems. + Change the number of CPUs assigned to each + leaf rcu_node structure. Useful for very + large systems, which will choose the value 64, + and for NUMA systems with large remote-access + latencies, which will choose a value aligned + with the appropriate hardware boundaries. rcutree.jiffies_till_sched_qs= [KNL] Set required age in jiffies for a diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 93c0f23c3e45..713eb92314b4 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4230,13 +4230,12 @@ static void __init rcu_init_geometry(void) rcu_fanout_leaf, nr_cpu_ids); /* - * The boot-time rcu_fanout_leaf parameter is only permitted - * to increase the leaf-level fanout, not decrease it. Of course, - * the leaf-level fanout cannot exceed the number of bits in - * the rcu_node masks. Complain and fall back to the compile- - * time values if these limits are exceeded. + * The boot-time rcu_fanout_leaf parameter must be at least two + * and cannot exceed the number of bits in the rcu_node masks. + * Complain and fall back to the compile-time values if this + * limit is exceeded. */ - if (rcu_fanout_leaf < RCU_FANOUT_LEAF || + if (rcu_fanout_leaf < 2 || rcu_fanout_leaf > sizeof(unsigned long) * 8) { rcu_fanout_leaf = RCU_FANOUT_LEAF; WARN_ON(1); @@ -4253,10 +4252,13 @@ static void __init rcu_init_geometry(void) /* * The tree must be able to accommodate the configured number of CPUs. - * If this limit is exceeded than we have a serious problem elsewhere. + * If this limit is exceeded, fall back to the compile-time values. */ - if (nr_cpu_ids > rcu_capacity[RCU_NUM_LVLS - 1]) - panic("rcu_init_geometry: rcu_capacity[] is too small"); + if (nr_cpu_ids > rcu_capacity[RCU_NUM_LVLS - 1]) { + rcu_fanout_leaf = RCU_FANOUT_LEAF; + WARN_ON(1); + return; + } /* Calculate the number of levels in the tree. */ for (i = 0; nr_cpu_ids > rcu_capacity[i]; i++) { -- cgit v1.2.3 From 27566139b6e2f6cfe273e8bb0e538d7616c2ea00 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 1 Aug 2015 10:45:26 -0700 Subject: documentation: No acquire/release for RCU readers Documentation/memory-barriers.txt calls out RCU as one of the sets of primitives associated with ACQUIRE and RELEASE. There really is an association in that rcu_assign_pointer() includes a RELEASE operation, but a quick read can convince people that rcu_read_lock() and rcu_read_unlock() have ACQUIRE and RELEASE semantics, which they do not. This commit therefore removes RCU from this list in order to avoid this confusion. Reported-by: Boqun Feng Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- Documentation/memory-barriers.txt | 1 - 1 file changed, 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 2ba8461b0631..d336e4d42029 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1789,7 +1789,6 @@ The Linux kernel has a number of locking constructs: (*) mutexes (*) semaphores (*) R/W semaphores - (*) RCU In all cases there are variants on "ACQUIRE" operations and "RELEASE" operations for each construct. These operations all imply certain barriers: -- cgit v1.2.3 From da873def8da5883a6c04d11f73dcd836c216cf4f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 4 Aug 2015 11:54:04 -0700 Subject: documentation: Call out slow consoles as cause of stall warnings The Linux kernel outputs copious text during boot, and a slow serial console can result in stall warnings, particularly when messages are printed with interrupts disabled. This commit adds this to the list of causes of RCU CPU stall warning messages. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- Documentation/RCU/stallwarn.txt | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'Documentation') diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index efb9454875ab..0f7fb4298e7e 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -205,6 +205,13 @@ o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the behavior, you might need to replace some of the cond_resched() calls with calls to cond_resched_rcu_qs(). +o Booting Linux using a console connection that is too slow to + keep up with the boot-time console-message rate. For example, + a 115Kbaud serial console can be -way- too slow to keep up + with boot-time message rates, and will frequently result in + RCU CPU stall warning messages. Especially if you have added + debug printk()s. + o Anything that prevents RCU's grace-period kthreads from running. This can result in the "All QSes seen" console-log message. This message will include information on when the kthread last -- cgit v1.2.3 From 2c4ac34bc2d97f056ed3c43fa03c0737fae46fb6 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 11 Aug 2015 14:26:33 +0200 Subject: documentation: Correct doc to use rcu_dereference_protected As there is lots of misinformation and outdated information on the Internet about nearly all topics related to the kernel, I thought it would be best if I based my RCU code on the guidelines of the examples in the Documentation/ tree of the latest kernel. One thing that stuck out when reading the whatisRCU.txt document was, "interesting how we don't need any function to dereference rcu protected pointers when doing updates if a lock is held. I wonder how static analyzers will work with that." Then, a few weeks later, upon discovering sparse's __rcu support, I ran it over my code, and lo and behold, things weren't done right. Examining other RCU usages in the kernel reveal consistent usage of rcu_dereference_protected, passing in lockdep_is_held as the conditional. So, this patch adds that idiom to the documentation, so that others ahead of me won't endure the same exercise. Signed-off-by: Jason A. Donenfeld Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- Documentation/RCU/whatisRCU.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index adc2184009c5..dc49c6712b17 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -364,7 +364,7 @@ uses of RCU may be found in listRCU.txt, arrayRCU.txt, and NMI-RCU.txt. }; DEFINE_SPINLOCK(foo_mutex); - struct foo *gbl_foo; + struct foo __rcu *gbl_foo; /* * Create a new struct foo that is the same as the one currently @@ -386,7 +386,7 @@ uses of RCU may be found in listRCU.txt, arrayRCU.txt, and NMI-RCU.txt. new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL); spin_lock(&foo_mutex); - old_fp = gbl_foo; + old_fp = rcu_dereference_protected(gbl_foo, lockdep_is_held(&foo_mutex)); *new_fp = *old_fp; new_fp->a = new_a; rcu_assign_pointer(gbl_foo, new_fp); @@ -487,7 +487,7 @@ The foo_update_a() function might then be written as follows: new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL); spin_lock(&foo_mutex); - old_fp = gbl_foo; + old_fp = rcu_dereference_protected(gbl_foo, lockdep_is_held(&foo_mutex)); *new_fp = *old_fp; new_fp->a = new_a; rcu_assign_pointer(gbl_foo, new_fp); -- cgit v1.2.3 From b672adf8cfb822781ab904343e5de0297ee117ed Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 24 Aug 2015 11:46:00 -0700 Subject: documentation: Catch up list of torture_type options This commit rids the documentation of long-obsolete torture_type options such as rcu_sync and adds new ones such as tasks. Also add verbiage noting the fact that rcutorture now concurrently tests the asynchrounous, synchronous, expedited synchronous, and polling grace-period primitives. Reported-by: David Miller Signed-off-by: Paul E. McKenney Acked-by: David Miller Reviewed-by: Josh Triplett --- Documentation/RCU/torture.txt | 39 +++++++++++++-------------------------- 1 file changed, 13 insertions(+), 26 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index dac02a6219b1..118e7c176ce7 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt @@ -166,40 +166,27 @@ test_no_idle_hz Whether or not to test the ability of RCU to operate in torture_type The type of RCU to test, with string values as follows: - "rcu": rcu_read_lock(), rcu_read_unlock() and call_rcu(). - - "rcu_sync": rcu_read_lock(), rcu_read_unlock(), and - synchronize_rcu(). - - "rcu_expedited": rcu_read_lock(), rcu_read_unlock(), and - synchronize_rcu_expedited(). + "rcu": rcu_read_lock(), rcu_read_unlock() and call_rcu(), + along with expedited, synchronous, and polling + variants. "rcu_bh": rcu_read_lock_bh(), rcu_read_unlock_bh(), and - call_rcu_bh(). - - "rcu_bh_sync": rcu_read_lock_bh(), rcu_read_unlock_bh(), - and synchronize_rcu_bh(). + call_rcu_bh(), along with expedited and synchronous + variants. - "rcu_bh_expedited": rcu_read_lock_bh(), rcu_read_unlock_bh(), - and synchronize_rcu_bh_expedited(). + "rcu_busted": This tests an intentionally incorrect version + of RCU in order to help test rcutorture itself. "srcu": srcu_read_lock(), srcu_read_unlock() and - call_srcu(). - - "srcu_sync": srcu_read_lock(), srcu_read_unlock() and - synchronize_srcu(). - - "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and - synchronize_srcu_expedited(). + call_srcu(), along with expedited and + synchronous variants. "sched": preempt_disable(), preempt_enable(), and - call_rcu_sched(). - - "sched_sync": preempt_disable(), preempt_enable(), and - synchronize_sched(). + call_rcu_sched(), along with expedited, + synchronous, and polling variants. - "sched_expedited": preempt_disable(), preempt_enable(), and - synchronize_sched_expedited(). + "tasks": voluntary context switch and call_rcu_tasks(), + along with expedited and synchronous variants. Defaults to "rcu". -- cgit v1.2.3 From ad2ad5d31f90fc6fb269e9be244224cecfc8b400 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 17 Sep 2015 08:18:32 -0700 Subject: documentation: Add lockless_dereference() The recently added lockless_dereference() macro is not present in the Documentation/ directory, so this commit fixes that. Reported-by: Dmitry Vyukov Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- Documentation/memory-barriers.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'Documentation') diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index d336e4d42029..8e7cf9ad3db1 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1710,6 +1710,17 @@ There are some more advanced barrier functions: operations" subsection for information on where to use these. + (*) lockless_dereference(); + This can be thought of as a pointer-fetch wrapper around the + smp_read_barrier_depends() data-dependency barrier. + + This is also similar to rcu_dereference(), but in cases where + object lifetime is handled by some mechanism other than RCU, for + example, when the objects removed only when the system goes down. + In addition, lockless_dereference() is used in some data structures + that can be used both with and without RCU. + + (*) dma_wmb(); (*) dma_rmb(); -- cgit v1.2.3 From 095777c417db142970adeb776fa0cb10810b8122 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 22 Jul 2015 14:07:27 -0700 Subject: locktorture: Support rtmutex torturing Real time mutexes is one of the few general primitives that we do not have in locktorture. Address this -- a few considerations: o To spice things up, enable competing thread(s) to become rt, such that we can stress different prio boosting paths in the rtmutex code. Introduce a ->task_boost callback, only used by rtmutex-torturer. Tasks will boost/deboost around every 50k (arbitrarily) lock/unlock operations. o Hold times are similar to what we have for other locks: only occasionally having longer hold times (per ~200k ops). So we roughly do two full rt boost+deboosting ops with short hold times. Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- Documentation/locking/locktorture.txt | 3 + kernel/locking/locktorture.c | 114 ++++++++++++++++++++- .../selftests/rcutorture/configs/lock/CFLIST | 3 +- .../selftests/rcutorture/configs/lock/LOCK05 | 6 ++ .../selftests/rcutorture/configs/lock/LOCK05.boot | 1 + 5 files changed, 124 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/rcutorture/configs/lock/LOCK05 create mode 100644 tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot (limited to 'Documentation') diff --git a/Documentation/locking/locktorture.txt b/Documentation/locking/locktorture.txt index 619f2bb136a5..a2ef3a929bf1 100644 --- a/Documentation/locking/locktorture.txt +++ b/Documentation/locking/locktorture.txt @@ -52,6 +52,9 @@ torture_type Type of lock to torture. By default, only spinlocks will o "mutex_lock": mutex_lock() and mutex_unlock() pairs. + o "rtmutex_lock": rtmutex_lock() and rtmutex_unlock() + pairs. Kernel must have CONFIG_RT_MUTEX=y. + o "rwsem_lock": read/write down() and up() semaphore pairs. torture_runnable Start locktorture at boot time in the case where the diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 32244186f1f2..e1ca7a2fae91 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -17,12 +17,14 @@ * * Copyright (C) IBM Corporation, 2014 * - * Author: Paul E. McKenney + * Authors: Paul E. McKenney + * Davidlohr Bueso * Based on kernel/rcu/torture.c. */ #include #include #include +#include #include #include #include @@ -91,11 +93,13 @@ struct lock_torture_ops { void (*init)(void); int (*writelock)(void); void (*write_delay)(struct torture_random_state *trsp); + void (*task_boost)(struct torture_random_state *trsp); void (*writeunlock)(void); int (*readlock)(void); void (*read_delay)(struct torture_random_state *trsp); void (*readunlock)(void); - unsigned long flags; + + unsigned long flags; /* for irq spinlocks */ const char *name; }; @@ -139,9 +143,15 @@ static void torture_lock_busted_write_unlock(void) /* BUGGY, do not use in real life!!! */ } +static void torture_boost_dummy(struct torture_random_state *trsp) +{ + /* Only rtmutexes care about priority */ +} + static struct lock_torture_ops lock_busted_ops = { .writelock = torture_lock_busted_write_lock, .write_delay = torture_lock_busted_write_delay, + .task_boost = torture_boost_dummy, .writeunlock = torture_lock_busted_write_unlock, .readlock = NULL, .read_delay = NULL, @@ -185,6 +195,7 @@ static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock) static struct lock_torture_ops spin_lock_ops = { .writelock = torture_spin_lock_write_lock, .write_delay = torture_spin_lock_write_delay, + .task_boost = torture_boost_dummy, .writeunlock = torture_spin_lock_write_unlock, .readlock = NULL, .read_delay = NULL, @@ -211,6 +222,7 @@ __releases(torture_spinlock) static struct lock_torture_ops spin_lock_irq_ops = { .writelock = torture_spin_lock_write_lock_irq, .write_delay = torture_spin_lock_write_delay, + .task_boost = torture_boost_dummy, .writeunlock = torture_lock_spin_write_unlock_irq, .readlock = NULL, .read_delay = NULL, @@ -275,6 +287,7 @@ static void torture_rwlock_read_unlock(void) __releases(torture_rwlock) static struct lock_torture_ops rw_lock_ops = { .writelock = torture_rwlock_write_lock, .write_delay = torture_rwlock_write_delay, + .task_boost = torture_boost_dummy, .writeunlock = torture_rwlock_write_unlock, .readlock = torture_rwlock_read_lock, .read_delay = torture_rwlock_read_delay, @@ -315,6 +328,7 @@ __releases(torture_rwlock) static struct lock_torture_ops rw_lock_irq_ops = { .writelock = torture_rwlock_write_lock_irq, .write_delay = torture_rwlock_write_delay, + .task_boost = torture_boost_dummy, .writeunlock = torture_rwlock_write_unlock_irq, .readlock = torture_rwlock_read_lock_irq, .read_delay = torture_rwlock_read_delay, @@ -354,6 +368,7 @@ static void torture_mutex_unlock(void) __releases(torture_mutex) static struct lock_torture_ops mutex_lock_ops = { .writelock = torture_mutex_lock, .write_delay = torture_mutex_delay, + .task_boost = torture_boost_dummy, .writeunlock = torture_mutex_unlock, .readlock = NULL, .read_delay = NULL, @@ -361,6 +376,90 @@ static struct lock_torture_ops mutex_lock_ops = { .name = "mutex_lock" }; +#ifdef CONFIG_RT_MUTEXES +static DEFINE_RT_MUTEX(torture_rtmutex); + +static int torture_rtmutex_lock(void) __acquires(torture_rtmutex) +{ + rt_mutex_lock(&torture_rtmutex); + return 0; +} + +static void torture_rtmutex_boost(struct torture_random_state *trsp) +{ + int policy; + struct sched_param param; + const unsigned int factor = 50000; /* yes, quite arbitrary */ + + if (!rt_task(current)) { + /* + * (1) Boost priority once every ~50k operations. When the + * task tries to take the lock, the rtmutex it will account + * for the new priority, and do any corresponding pi-dance. + */ + if (!(torture_random(trsp) % + (cxt.nrealwriters_stress * factor))) { + policy = SCHED_FIFO; + param.sched_priority = MAX_RT_PRIO - 1; + } else /* common case, do nothing */ + return; + } else { + /* + * The task will remain boosted for another ~500k operations, + * then restored back to its original prio, and so forth. + * + * When @trsp is nil, we want to force-reset the task for + * stopping the kthread. + */ + if (!trsp || !(torture_random(trsp) % + (cxt.nrealwriters_stress * factor * 2))) { + policy = SCHED_NORMAL; + param.sched_priority = 0; + } else /* common case, do nothing */ + return; + } + + sched_setscheduler_nocheck(current, policy, ¶m); +} + +static void torture_rtmutex_delay(struct torture_random_state *trsp) +{ + const unsigned long shortdelay_us = 2; + const unsigned long longdelay_ms = 100; + + /* + * We want a short delay mostly to emulate likely code, and + * we want a long delay occasionally to force massive contention. + */ + if (!(torture_random(trsp) % + (cxt.nrealwriters_stress * 2000 * longdelay_ms))) + mdelay(longdelay_ms); + if (!(torture_random(trsp) % + (cxt.nrealwriters_stress * 2 * shortdelay_us))) + udelay(shortdelay_us); +#ifdef CONFIG_PREEMPT + if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) + preempt_schedule(); /* Allow test to be preempted. */ +#endif +} + +static void torture_rtmutex_unlock(void) __releases(torture_rtmutex) +{ + rt_mutex_unlock(&torture_rtmutex); +} + +static struct lock_torture_ops rtmutex_lock_ops = { + .writelock = torture_rtmutex_lock, + .write_delay = torture_rtmutex_delay, + .task_boost = torture_rtmutex_boost, + .writeunlock = torture_rtmutex_unlock, + .readlock = NULL, + .read_delay = NULL, + .readunlock = NULL, + .name = "rtmutex_lock" +}; +#endif + static DECLARE_RWSEM(torture_rwsem); static int torture_rwsem_down_write(void) __acquires(torture_rwsem) { @@ -419,6 +518,7 @@ static void torture_rwsem_up_read(void) __releases(torture_rwsem) static struct lock_torture_ops rwsem_lock_ops = { .writelock = torture_rwsem_down_write, .write_delay = torture_rwsem_write_delay, + .task_boost = torture_boost_dummy, .writeunlock = torture_rwsem_up_write, .readlock = torture_rwsem_down_read, .read_delay = torture_rwsem_read_delay, @@ -442,6 +542,7 @@ static int lock_torture_writer(void *arg) if ((torture_random(&rand) & 0xfffff) == 0) schedule_timeout_uninterruptible(1); + cxt.cur_ops->task_boost(&rand); cxt.cur_ops->writelock(); if (WARN_ON_ONCE(lock_is_write_held)) lwsp->n_lock_fail++; @@ -456,6 +557,8 @@ static int lock_torture_writer(void *arg) stutter_wait("lock_torture_writer"); } while (!torture_must_stop()); + + cxt.cur_ops->task_boost(NULL); /* reset prio */ torture_kthread_stopping("lock_torture_writer"); return 0; } @@ -642,6 +745,9 @@ static int __init lock_torture_init(void) &spin_lock_ops, &spin_lock_irq_ops, &rw_lock_ops, &rw_lock_irq_ops, &mutex_lock_ops, +#ifdef CONFIG_RT_MUTEXES + &rtmutex_lock_ops, +#endif &rwsem_lock_ops, }; @@ -676,6 +782,10 @@ static int __init lock_torture_init(void) if (strncmp(torture_type, "mutex", 5) == 0) cxt.debug_lock = true; #endif +#ifdef CONFIG_DEBUG_RT_MUTEXES + if (strncmp(torture_type, "rtmutex", 7) == 0) + cxt.debug_lock = true; +#endif #ifdef CONFIG_DEBUG_SPINLOCK if ((strncmp(torture_type, "spin", 4) == 0) || (strncmp(torture_type, "rw_lock", 7) == 0)) diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFLIST b/tools/testing/selftests/rcutorture/configs/lock/CFLIST index 6910b7370761..6ed32794eaa1 100644 --- a/tools/testing/selftests/rcutorture/configs/lock/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/lock/CFLIST @@ -1,4 +1,5 @@ LOCK01 LOCK02 LOCK03 -LOCK04 \ No newline at end of file +LOCK04 +LOCK05 diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK05 b/tools/testing/selftests/rcutorture/configs/lock/LOCK05 new file mode 100644 index 000000000000..1d1da1477fc3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK05 @@ -0,0 +1,6 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot new file mode 100644 index 000000000000..8ac37307c987 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot @@ -0,0 +1 @@ +locktorture.torture_type=rtmutex_lock -- cgit v1.2.3