diff options
60 files changed, 774 insertions, 621 deletions
diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index fe397f90a34f..6c9d9d37c83a 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl @@ -87,7 +87,10 @@ X!Iinclude/linux/kobject.h !Ekernel/printk/printk.c !Ekernel/panic.c !Ekernel/sys.c -!Ekernel/rcupdate.c +!Ekernel/rcu/srcu.c +!Ekernel/rcu/tree.c +!Ekernel/rcu/tree_plugin.h +!Ekernel/rcu/update.c </sect1> <sect1><title>Device Resource Management</title> diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 7703ec73a9bb..91266193b8f4 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -202,8 +202,8 @@ over a rather long period of time, but improvements are always welcome! updater uses call_rcu_sched() or synchronize_sched(), then the corresponding readers must disable preemption, possibly by calling rcu_read_lock_sched() and rcu_read_unlock_sched(). - If the updater uses synchronize_srcu() or call_srcu(), - the the corresponding readers must use srcu_read_lock() and + If the updater uses synchronize_srcu() or call_srcu(), then + the corresponding readers must use srcu_read_lock() and srcu_read_unlock(), and with the same srcu_struct. The rules for the expedited primitives are the same as for their non-expedited counterparts. Mixing things up will result in confusion and diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 8e9359de1d28..6f3a0057548e 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -12,12 +12,12 @@ CONFIG_RCU_CPU_STALL_TIMEOUT This kernel configuration parameter defines the period of time that RCU will wait from the beginning of a grace period until it issues an RCU CPU stall warning. This time period is normally - sixty seconds. + 21 seconds. This configuration parameter may be changed at runtime via the /sys/module/rcutree/parameters/rcu_cpu_stall_timeout, however this parameter is checked only at the beginning of a cycle. - So if you are 30 seconds into a 70-second stall, setting this + So if you are 10 seconds into a 40-second stall, setting this sysfs parameter to (say) five will shorten the timeout for the -next- stall, or the following warning for the current stall (assuming the stall lasts long enough). It will not affect the @@ -32,7 +32,7 @@ CONFIG_RCU_CPU_STALL_VERBOSE also dump the stacks of any tasks that are blocking the current RCU-preempt grace period. -RCU_CPU_STALL_INFO +CONFIG_RCU_CPU_STALL_INFO This kernel configuration parameter causes the stall warning to print out additional per-CPU diagnostic information, including @@ -43,7 +43,8 @@ RCU_STALL_DELAY_DELTA Although the lockdep facility is extremely useful, it does add some overhead. Therefore, under CONFIG_PROVE_RCU, the RCU_STALL_DELAY_DELTA macro allows five extra seconds before - giving an RCU CPU stall warning message. + giving an RCU CPU stall warning message. (This is a cpp + macro, not a kernel configuration parameter.) RCU_STALL_RAT_DELAY @@ -52,7 +53,8 @@ RCU_STALL_RAT_DELAY However, if the offending CPU does not detect its own stall in the number of jiffies specified by RCU_STALL_RAT_DELAY, then some other CPU will complain. This delay is normally set to - two jiffies. + two jiffies. (This is a cpp macro, not a kernel configuration + parameter.) When a CPU detects that it is stalling, it will print a message similar to the following: @@ -86,7 +88,12 @@ printing, there will be a spurious stall-warning message: INFO: rcu_bh_state detected stalls on CPUs/tasks: { } (detected by 4, 2502 jiffies) -This is rare, but does happen from time to time in real life. +This is rare, but does happen from time to time in real life. It is also +possible for a zero-jiffy stall to be flagged in this case, depending +on how the stall warning and the grace-period initialization happen to +interact. Please note that it is not possible to entirely eliminate this +sort of false positive without resorting to things like stop_machine(), +which is overkill for this sort of problem. If the CONFIG_RCU_CPU_STALL_INFO kernel configuration parameter is set, more information is printed with the stall-warning message, for example: @@ -216,4 +223,5 @@ that portion of the stack which remains the same from trace to trace. If you can reliably trigger the stall, ftrace can be quite helpful. RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE -and with RCU's event tracing. +and with RCU's event tracing. For information on RCU's event tracing, +see include/trace/events/rcu.h. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index fcbb736d55fe..203f4a9d9efe 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2599,7 +2599,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ramdisk_size= [RAM] Sizes of RAM disks in kilobytes See Documentation/blockdev/ramdisk.txt. - rcu_nocbs= [KNL,BOOT] + rcu_nocbs= [KNL] In kernels built with CONFIG_RCU_NOCB_CPU=y, set the specified list of CPUs to be no-callback CPUs. Invocation of these CPUs' RCU callbacks will @@ -2612,7 +2612,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. real-time workloads. It can also improve energy efficiency for asymmetric multiprocessors. - rcu_nocb_poll [KNL,BOOT] + rcu_nocb_poll [KNL] Rather than requiring that offloaded CPUs (specified by rcu_nocbs= above) explicitly awaken the corresponding "rcuoN" kthreads, @@ -2623,126 +2623,145 @@ bytes respectively. Such letter suffixes can also be entirely omitted. energy efficiency by requiring that the kthreads periodically wake up to do the polling. - rcutree.blimit= [KNL,BOOT] + rcutree.blimit= [KNL] Set maximum number of finished RCU callbacks to process in one batch. - rcutree.fanout_leaf= [KNL,BOOT] + rcutree.rcu_fanout_leaf= [KNL] Increase the number of CPUs assigned to each leaf rcu_node structure. Useful for very large systems. - rcutree.jiffies_till_first_fqs= [KNL,BOOT] + rcutree.jiffies_till_first_fqs= [KNL] Set delay from grace-period initialization to first attempt to force quiescent states. Units are jiffies, minimum value is zero, and maximum value is HZ. - rcutree.jiffies_till_next_fqs= [KNL,BOOT] + rcutree.jiffies_till_next_fqs= [KNL] Set delay between subsequent attempts to force quiescent states. Units are jiffies, minimum value is one, and maximum value is HZ. - rcutree.qhimark= [KNL,BOOT] + rcutree.qhimark= [KNL] Set threshold of queued RCU callbacks over which batch limiting is disabled. - rcutree.qlowmark= [KNL,BOOT] + rcutree.qlowmark= [KNL] Set threshold of queued RCU callbacks below which batch limiting is re-enabled. - rcutree.rcu_cpu_stall_suppress= [KNL,BOOT] - Suppress RCU CPU stall warning messages. - - rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] - Set timeout for RCU CPU stall warning messages. - - rcutree.rcu_idle_gp_delay= [KNL,BOOT] + rcutree.rcu_idle_gp_delay= [KNL] Set wakeup interval for idle CPUs that have RCU callbacks (RCU_FAST_NO_HZ=y). - rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT] + rcutree.rcu_idle_lazy_gp_delay= [KNL] Set wakeup interval for idle CPUs that have only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y). Lazy RCU callbacks are those which RCU can prove do nothing more than free memory. - rcutorture.fqs_duration= [KNL,BOOT] + rcutorture.fqs_duration= [KNL] Set duration of force_quiescent_state bursts. - rcutorture.fqs_holdoff= [KNL,BOOT] + rcutorture.fqs_holdoff= [KNL] Set holdoff time within force_quiescent_state bursts. - rcutorture.fqs_stutter= [KNL,BOOT] + rcutorture.fqs_stutter= [KNL] Set wait time between force_quiescent_state bursts. - rcutorture.irqreader= [KNL,BOOT] - Test RCU readers from irq handlers. + rcutorture.gp_exp= [KNL] + Use expedited update-side primitives. + + rcutorture.gp_normal= [KNL] + Use normal (non-expedited) update-side primitives. + If both gp_exp and gp_normal are set, do both. + If neither gp_exp nor gp_normal are set, still + do both. - rcutorture.n_barrier_cbs= [KNL,BOOT] + rcutorture.n_barrier_cbs= [KNL] Set callbacks/threads for rcu_barrier() testing. - rcutorture.nfakewriters= [KNL,BOOT] + rcutorture.nfakewriters= [KNL] Set number of concurrent RCU writers. These just stress RCU, they don't participate in the actual test, hence the "fake". - rcutorture.nreaders= [KNL,BOOT] + rcutorture.nreaders= [KNL] Set number of RCU readers. - rcutorture.onoff_holdoff= [KNL,BOOT] + rcutorture.object_debug= [KNL] + Enable debug-object double-call_rcu() testing. + + rcutorture.onoff_holdoff= [KNL] Set time (s) after boot for CPU-hotplug testing. - rcutorture.onoff_interval= [KNL,BOOT] + rcutorture.onoff_interval= [KNL] Set time (s) between CPU-hotplug operations, or zero to disable CPU-hotplug testing. - rcutorture.shuffle_interval= [KNL,BOOT] + rcutorture.rcutorture_runnable= [BOOT] + Start rcutorture running at boot time. + + rcutorture.shuffle_interval= [KNL] Set task-shuffle interval (s). Shuffling tasks allows some CPUs to go into dyntick-idle mode during the rcutorture test. - rcutorture.shutdown_secs= [KNL,BOOT] + rcutorture.shutdown_secs= [KNL] Set time (s) after boot system shutdown. This is useful for hands-off automated testing. - rcutorture.stall_cpu= [KNL,BOOT] + rcutorture.stall_cpu= [KNL] Duration of CPU stall (s) to test RCU CPU stall warnings, zero to disable. - rcutorture.stall_cpu_holdoff= [KNL,BOOT] + rcutorture.stall_cpu_holdoff= [KNL] Time to wait (s) after boot before inducing stall. - rcutorture.stat_interval= [KNL,BOOT] + rcutorture.stat_interval= [KNL] Time (s) between statistics printk()s. - rcutorture.stutter= [KNL,BOOT] + rcutorture.stutter= [KNL] Time (s) to stutter testing, for example, specifying five seconds causes the test to run for five seconds, wait for five seconds, and so on. This tests RCU's ability to transition abruptly to and from idle. - rcutorture.test_boost= [KNL,BOOT] + rcutorture.test_boost= [KNL] Test RCU priority boosting? 0=no, 1=maybe, 2=yes. "Maybe" means test if the RCU implementation under test support RCU priority boosting. - rcutorture.test_boost_duration= [KNL,BOOT] + rcutorture.test_boost_duration= [KNL] Duration (s) of each individual boost test. - rcutorture.test_boost_interval= [KNL,BOOT] + rcutorture.test_boost_interval= [KNL] Interval (s) between each boost test. - rcutorture.test_no_idle_hz= [KNL,BOOT] + rcutorture.test_no_idle_hz= [KNL] Test RCU's dyntick-idle handling. See also the rcutorture.shuffle_interval parameter. - rcutorture.torture_type= [KNL,BOOT] + rcutorture.torture_type= [KNL] Specify the RCU implementation to test. - rcutorture.verbose= [KNL,BOOT] + rcutorture.verbose= [KNL] Enable additional printk() statements. + rcupdate.rcu_expedited= [KNL] + Use expedited grace-period primitives, for + example, synchronize_rcu_expedited() instead + of synchronize_rcu(). This reduces latency, + but can increase CPU utilization, degrade + real-time latency, and degrade energy efficiency. + + rcupdate.rcu_cpu_stall_suppress= [KNL] + Suppress RCU CPU stall warning messages. + + rcupdate.rcu_cpu_stall_timeout= [KNL] + Set timeout for RCU CPU stall warning messages. + rdinit= [KNL] Format: <full_path> Run specified binary instead of /init from the ramdisk, diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt index 32351bfabf20..827104fb9364 100644 --- a/Documentation/kernel-per-CPU-kthreads.txt +++ b/Documentation/kernel-per-CPU-kthreads.txt @@ -181,12 +181,17 @@ To reduce its OS jitter, do any of the following: make sure that this is safe on your particular system. d. It is not possible to entirely get rid of OS jitter from vmstat_update() on CONFIG_SMP=y systems, but you - can decrease its frequency by writing a large value to - /proc/sys/vm/stat_interval. The default value is HZ, - for an interval of one second. Of course, larger values - will make your virtual-memory statistics update more - slowly. Of course, you can also run your workload at - a real-time priority, thus preempting vmstat_update(). + can decrease its frequency by writing a large value + to /proc/sys/vm/stat_interval. The default value is + HZ, for an interval of one second. Of course, larger + values will make your virtual-memory statistics update + more slowly. Of course, you can also run your workload + at a real-time priority, thus preempting vmstat_update(), + but if your workload is CPU-bound, this is a bad idea. + However, there is an RFC patch from Christoph Lameter + (based on an earlier one from Gilad Ben-Yossef) that + reduces or even eliminates vmstat overhead for some + workloads at https://lkml.org/lkml/2013/9/4/379. e. If running on high-end powerpc servers, build with CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS daemon from running on each CPU every second or so. diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt index dd2f7b26ca30..72d010689751 100644 --- a/Documentation/lockstat.txt +++ b/Documentation/lockstat.txt @@ -46,16 +46,14 @@ With these hooks we provide the following statistics: contentions - number of lock acquisitions that had to wait wait time min - shortest (non-0) time we ever had to wait for a lock max - longest time we ever had to wait for a lock - total - total time we spend waiting on this lock + total - total time we spend waiting on this lock + avg - average time spent waiting on this lock acq-bounces - number of lock acquisitions that involved x-cpu data acquisitions - number of times we took the lock hold time min - shortest (non-0) time we ever held the lock - max - longest time we ever held the lock - total - total time this lock was held - -From these number various other statistics can be derived, such as: - - hold time average = hold time total / acquisitions + max - longest time we ever held the lock + total - total time this lock was held + avg - average time this lock was held These numbers are gathered per lock class, per read/write state (when applicable). @@ -84,37 +82,38 @@ Look at the current lock statistics: # less /proc/lock_stat -01 lock_stat version 0.3 -02 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -03 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total -04 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +01 lock_stat version 0.4 +02----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +03 class name con-bounces contentions waittime-min waittime-max waittime-total waittime-avg acq-bounces acquisitions holdtime-min holdtime-max holdtime-total holdtime-avg +04----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 05 -06 &mm->mmap_sem-W: 233 538 18446744073708 22924.27 607243.51 1342 45806 1.71 8595.89 1180582.34 -07 &mm->mmap_sem-R: 205 587 18446744073708 28403.36 731975.00 1940 412426 0.58 187825.45 6307502.88 -08 --------------- -09 &mm->mmap_sem 487 [<ffffffff8053491f>] do_page_fault+0x466/0x928 -10 &mm->mmap_sem 179 [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d -11 &mm->mmap_sem 279 [<ffffffff80210a57>] sys_mmap+0x75/0xce -12 &mm->mmap_sem 76 [<ffffffff802a490b>] sys_munmap+0x32/0x59 -13 --------------- -14 &mm->mmap_sem 270 [<ffffffff80210a57>] sys_mmap+0x75/0xce -15 &mm->mmap_sem 431 [<ffffffff8053491f>] do_page_fault+0x466/0x928 -16 &mm->mmap_sem 138 [<ffffffff802a490b>] sys_munmap+0x32/0x59 -17 &mm->mmap_sem 145 [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d +06 &mm->mmap_sem-W: 46 84 0.26 939.10 16371.53 194.90 47291 2922365 0.16 2220301.69 17464026916.32 5975.99 +07 &mm->mmap_sem-R: 37 100 1.31 299502.61 325629.52 3256.30 212344 34316685 0.10 7744.91 95016910.20 2.77 +08 --------------- +09 &mm->mmap_sem 1 [<ffffffff811502a7>] khugepaged_scan_mm_slot+0x57/0x280 +19 &mm->mmap_sem 96 [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510 +11 &mm->mmap_sem 34 [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0 +12 &mm->mmap_sem 17 [<ffffffff81127e71>] vm_munmap+0x41/0x80 +13 --------------- +14 &mm->mmap_sem 1 [<ffffffff81046fda>] dup_mmap+0x2a/0x3f0 +15 &mm->mmap_sem 60 [<ffffffff81129e29>] SyS_mprotect+0xe9/0x250 +16 &mm->mmap_sem 41 [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510 +17 &mm->mmap_sem 68 [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0 18 -19 ............................................................................................................................................................................................... +19............................................................................................................................................................................................................................. 20 -21 dcache_lock: 621 623 0.52 118.26 1053.02 6745 91930 0.29 316.29 118423.41 -22 ----------- -23 dcache_lock 179 [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54 -24 dcache_lock 113 [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb -25 dcache_lock 99 [<ffffffff802ca0dc>] d_rehash+0x1b/0x44 -26 dcache_lock 104 [<ffffffff802cbca0>] d_instantiate+0x36/0x8a -27 ----------- -28 dcache_lock 192 [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54 -29 dcache_lock 98 [<ffffffff802ca0dc>] d_rehash+0x1b/0x44 -30 dcache_lock 72 [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb -31 dcache_lock 112 [<ffffffff802cbca0>] d_instantiate+0x36/0x8a +21 unix_table_lock: 110 112 0.21 49.24 163.91 1.46 21094 66312 0.12 624.42 31589.81 0.48 +22 --------------- +23 unix_table_lock 45 [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0 +24 unix_table_lock 47 [<ffffffff8150b111>] unix_release_sock+0x31/0x250 +25 unix_table_lock 15 [<ffffffff8150ca37>] unix_find_other+0x117/0x230 +26 unix_table_lock 5 [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0 +27 --------------- +28 unix_table_lock 39 [<ffffffff8150b111>] unix_release_sock+0x31/0x250 +29 unix_table_lock 49 [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0 +30 unix_table_lock 20 [<ffffffff8150ca37>] unix_find_other+0x117/0x230 +31 unix_table_lock 4 [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0 + This excerpt shows the first two lock class statistics. Line 01 shows the output version - each time the format changes this will be updated. Line 02-04 @@ -131,30 +130,30 @@ The integer part of the time values is in us. Dealing with nested locks, subclasses may appear: -32............................................................................................................................................................................................... +32........................................................................................................................................................................................................................... 33 -34 &rq->lock: 13128 13128 0.43 190.53 103881.26 97454 3453404 0.00 401.11 13224683.11 +34 &rq->lock: 13128 13128 0.43 190.53 103881.26 7.91 97454 3453404 0.00 401.11 13224683.11 3.82 35 --------- -36 &rq->lock 645 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75 -37 &rq->lock 297 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a -38 &rq->lock 360 [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a -39 &rq->lock 428 [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb +36 &rq->lock 645 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75 +37 &rq->lock 297 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a +38 &rq->lock 360 [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a +39 &rq->lock 428 [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb 40 --------- -41 &rq->lock 77 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75 -42 &rq->lock 174 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a -43 &rq->lock 4715 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54 -44 &rq->lock 893 [<ffffffff81340524>] schedule+0x157/0x7b8 +41 &rq->lock 77 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75 +42 &rq->lock 174 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a +43 &rq->lock 4715 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54 +44 &rq->lock 893 [<ffffffff81340524>] schedule+0x157/0x7b8 45 -46............................................................................................................................................................................................... +46........................................................................................................................................................................................................................... 47 -48 &rq->lock/1: 11526 11488 0.33 388.73 136294.31 21461 38404 0.00 37.93 109388.53 +48 &rq->lock/1: 1526 11488 0.33 388.73 136294.31 11.86 21461 38404 0.00 37.93 109388.53 2.84 49 ----------- -50 &rq->lock/1 11526 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54 +50 &rq->lock/1 11526 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54 51 ----------- -52 &rq->lock/1 5645 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54 -53 &rq->lock/1 1224 [<ffffffff81340524>] schedule+0x157/0x7b8 -54 &rq->lock/1 4336 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54 -55 &rq->lock/1 181 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a +52 &rq->lock/1 5645 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54 +53 &rq->lock/1 1224 [<ffffffff81340524>] schedule+0x157/0x7b8 +54 &rq->lock/1 4336 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54 +55 &rq->lock/1 181 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a Line 48 shows statistics for the second subclass (/1) of &rq->lock class (subclass starts from 0), since in this case, as line 50 suggests, @@ -163,16 +162,16 @@ double_rq_lock actually acquires a nested lock of two spinlocks. View the top contending locks: # grep : /proc/lock_stat | head - &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60 - &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38 - dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 - &inode->i_mutex: 161 286 18446744073709 62882.54 1244614.55 3653 20598 18446744073709 62318.60 1693822.74 - &zone->lru_lock: 94 94 0.53 7.33 92.10 4366 32690 0.29 59.81 16350.06 - &inode->i_data.i_mmap_mutex: 79 79 0.40 3.77 53.03 11779 87755 0.28 116.93 29898.44 - &q->__queue_lock: 48 50 0.52 31.62 86.31 774 13131 0.17 113.08 12277.52 - &rq->rq_lock_key: 43 47 0.74 68.50 170.63 3706 33929 0.22 107.99 17460.62 - &rq->rq_lock_key#2: 39 46 0.75 6.68 49.03 2979 32292 0.17 125.17 17137.63 - tasklist_lock-W: 15 15 1.45 10.87 32.70 1201 7390 0.58 62.55 13648.47 + clockevents_lock: 2926159 2947636 0.15 46882.81 1784540466.34 605.41 3381345 3879161 0.00 2260.97 53178395.68 13.71 + tick_broadcast_lock: 346460 346717 0.18 2257.43 39364622.71 113.54 3642919 4242696 0.00 2263.79 49173646.60 11.59 + &mapping->i_mmap_mutex: 203896 203899 3.36 645530.05 31767507988.39 155800.21 3361776 8893984 0.17 2254.15 14110121.02 1.59 + &rq->lock: 135014 136909 0.18 606.09 842160.68 6.15 1540728 10436146 0.00 728.72 17606683.41 1.69 + &(&zone->lru_lock)->rlock: 93000 94934 0.16 59.18 188253.78 1.98 1199912 3809894 0.15 391.40 3559518.81 0.93 + tasklist_lock-W: 40667 41130 0.23 1189.42 428980.51 10.43 270278 510106 0.16 653.51 3939674.91 7.72 + tasklist_lock-R: 21298 21305 0.20 1310.05 215511.12 10.12 186204 241258 0.14 1162.33 1179779.23 4.89 + rcu_node_1: 47656 49022 0.16 635.41 193616.41 3.95 844888 1865423 0.00 764.26 1656226.96 0.89 + &(&dentry->d_lockref.lock)->rlock: 39791 40179 0.15 1302.08 88851.96 2.21 2790851 12527025 0.10 1910.75 3379714.27 0.27 + rcu_node_0: 29203 30064 0.16 786.55 1555573.00 51.74 88963 244254 0.00 398.87 428872.51 1.76 Clear the statistics: diff --git a/MAINTAINERS b/MAINTAINERS index dcd69cb34806..ddabf7042b65 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6950,7 +6950,7 @@ M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git F: Documentation/RCU/torture.txt -F: kernel/rcutorture.c +F: kernel/rcu/torture.c RDC R-321X SoC M: Florian Fainelli <florian@openwrt.org> @@ -6977,8 +6977,9 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git F: Documentation/RCU/ X: Documentation/RCU/torture.txt F: include/linux/rcu* -F: kernel/rcu* -X: kernel/rcutorture.c +X: include/linux/srcu.h +F: kernel/rcu/ +X: kernel/rcu/torture.c REAL TIME CLOCK (RTC) SUBSYSTEM M: Alessandro Zummo <a.zummo@towertech.it> @@ -7667,8 +7668,8 @@ M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> W: http://www.rdrop.com/users/paulmck/RCU/ S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git -F: include/linux/srcu* -F: kernel/srcu* +F: include/linux/srcu.h +F: kernel/rcu/srcu.c SMACK SECURITY MODULE M: Casey Schaufler <casey@schaufler-ca.com> @@ -8919,61 +8920,14 @@ W: http://pegasus2.sourceforge.net/ S: Maintained F: drivers/net/usb/rtl8150.c -USB SERIAL BELKIN F5U103 DRIVER -M: William Greathouse <wgreathouse@smva.com> +USB SERIAL SUBSYSTEM +M: Johan Hovold <jhovold@gmail.com> L: linux-usb@vger.kernel.org S: Maintained -F: drivers/usb/serial/belkin_sa.* - -USB SERIAL CYPRESS M8 DRIVER -M: Lonnie Mendez <dignome@gmail.com> -L: linux-usb@vger.kernel.org -S: Maintained -W: http://geocities.com/i0xox0i -W: http://firstlight.net/cvs -F: drivers/usb/serial/cypress_m8.* - -USB SERIAL CYBERJACK DRIVER -M: Matthias Bruestle and Harald Welte <support@reiner-sct.com> -W: http://www.reiner-sct.de/support/treiber_cyberjack.php -S: Maintained -F: drivers/usb/serial/cyberjack.c - -USB SERIAL DIGI ACCELEPORT DRIVER -M: Peter Berger <pberger@brimson.com> -M: Al Borchers <alborchers@steinerpoint.com> -L: linux-usb@vger.kernel.org -S: Maintained -F: drivers/usb/serial/digi_acceleport.c - -USB SERIAL DRIVER -M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -L: linux-usb@vger.kernel.org -S: Supported F: Documentation/usb/usb-serial.txt -F: drivers/usb/serial/generic.c -F: drivers/usb/serial/usb-serial.c +F: drivers/usb/serial/ F: include/linux/usb/serial.h -USB SERIAL EMPEG EMPEG-CAR MARK I/II DRIVER -M: Gary Brubaker <xavyer@ix.netcom.com> -L: linux-usb@vger.kernel.org -S: Maintained -F: drivers/usb/serial/empeg.c - -USB SERIAL KEYSPAN DRIVER -M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -L: linux-usb@vger.kernel.org -S: Maintained -F: drivers/usb/serial/*keyspan* - -USB SERIAL WHITEHEAT DRIVER -M: Support Department <support@connecttech.com> -L: linux-usb@vger.kernel.org -W: http://www.connecttech.com -S: Supported -F: drivers/usb/serial/whiteheat* - USB SMSC75XX ETHERNET DRIVER M: Steve Glendinning <steve.glendinning@shawell.net> L: netdev@vger.kernel.org @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = One Giant Leap for Frogkind # *DOCUMENTATION* diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index d63f3de0cd5b..0c14d8a52683 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -17,7 +17,7 @@ #include <asm/pgalloc.h> #include <asm/mmu.h> -static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address) +static int handle_vmalloc_fault(unsigned long address) { /* * Synchronize this task's top level page-table @@ -27,7 +27,7 @@ static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address) pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; - pgd = pgd_offset_fast(mm, address); + pgd = pgd_offset_fast(current->active_mm, address); pgd_k = pgd_offset_k(address); if (!pgd_present(*pgd_k)) @@ -72,7 +72,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address) * nothing more. */ if (address >= VMALLOC_START && address <= VMALLOC_END) { - ret = handle_vmalloc_fault(mm, address); + ret = handle_vmalloc_fault(address); if (unlikely(ret)) goto bad_area_nosemaphore; else diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 45f1ffcf1a4b..24cdf64789c3 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -971,11 +971,11 @@ static const struct mips_perf_event mipsxx74Kcore_cache_map [C(LL)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = { 0x1c, CNTR_ODD, P }, - [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN | CNTR_ODD, P }, + [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN, P }, }, [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = { 0x1c, CNTR_ODD, P }, - [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN | CNTR_ODD, P }, + [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN, P }, }, }, [C(ITLB)] = { diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c index c69da3734699..5b28e81d94a0 100644 --- a/arch/mips/mti-malta/malta-int.c +++ b/arch/mips/mti-malta/malta-int.c @@ -473,7 +473,7 @@ static void __init fill_ipi_map(void) { int cpu; - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { fill_ipi_map1(gic_resched_int_base, cpu, GIC_CPU_INT1); fill_ipi_map1(gic_call_int_base, cpu, GIC_CPU_INT2); } @@ -574,8 +574,9 @@ void __init arch_init_irq(void) /* FIXME */ int i; #if defined(CONFIG_MIPS_MT_SMP) - gic_call_int_base = GIC_NUM_INTRS - NR_CPUS; - gic_resched_int_base = gic_call_int_base - NR_CPUS; + gic_call_int_base = GIC_NUM_INTRS - + (NR_CPUS - nr_cpu_ids) * 2 - nr_cpu_ids; + gic_resched_int_base = gic_call_int_base - nr_cpu_ids; fill_ipi_map(); #endif gic_init(GIC_BASE_ADDR, GIC_ADDRSPACE_SZ, gic_intr_map, @@ -599,7 +600,7 @@ void __init arch_init_irq(void) printk("CPU%d: status register now %08x\n", smp_processor_id(), read_c0_status()); write_c0_status(0x1100dc00); printk("CPU%d: status register frc %08x\n", smp_processor_id(), read_c0_status()); - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { arch_init_ipiirq(MIPS_GIC_IRQ_BASE + GIC_RESCHED_INT(i), &irq_resched); arch_init_ipiirq(MIPS_GIC_IRQ_BASE + diff --git a/arch/mips/ralink/timer.c b/arch/mips/ralink/timer.c index e49241a2c39a..202785709441 100644 --- a/arch/mips/ralink/timer.c +++ b/arch/mips/ralink/timer.c @@ -126,7 +126,7 @@ static int rt_timer_probe(struct platform_device *pdev) return -ENOENT; } - rt->membase = devm_request_and_ioremap(&pdev->dev, res); + rt->membase = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(rt->membase)) return PTR_ERR(rt->membase); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f67e839f06c8..e22d7f5f9837 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -638,10 +638,10 @@ config PARAVIRT_SPINLOCKS spinlock implementation with something virtualization-friendly (for example, block the virtual CPU rather than spinning). - Unfortunately the downside is an up to 5% performance hit on - native kernels, with various workloads. + It has a minimal impact on native kernels and gives a nice performance + benefit on paravirtualized KVM / Xen kernels. - If you are unsure how to answer this question, answer N. + If you are unsure how to answer this question, answer Y. source "arch/x86/xen/Kconfig" diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 15e8563e5c24..df94598ad05a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -402,17 +402,6 @@ void alternatives_enable_smp(void) { struct smp_alt_module *mod; -#ifdef CONFIG_LOCKDEP - /* - * Older binutils section handling bug prevented - * alternatives-replacement from working reliably. - * - * If this still occurs then you should see a hang - * or crash shortly after this line: - */ - pr_info("lockdep: fixing up alternatives\n"); -#endif - /* Why bother if there are no other CPUs? */ BUG_ON(num_possible_cpus() == 1); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9d8449158cf9..8a87a3224121 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1276,16 +1276,16 @@ void perf_events_lapic_init(void) static int __kprobes perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) { - int ret; u64 start_clock; u64 finish_clock; + int ret; if (!atomic_read(&active_events)) return NMI_DONE; - start_clock = local_clock(); + start_clock = sched_clock(); ret = x86_pmu.handle_irq(regs); - finish_clock = local_clock(); + finish_clock = sched_clock(); perf_sample_event_took(finish_clock - start_clock); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ba77ebc2c353..6fcb49ce50a1 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -113,10 +113,10 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 u64 before, delta, whole_msecs; int remainder_ns, decimal_msecs, thishandled; - before = local_clock(); + before = sched_clock(); thishandled = a->handler(type, regs); handled += thishandled; - delta = local_clock() - before; + delta = sched_clock() - before; trace_nmi_handler(a->handler, (int)delta, thishandled); if (delta < nmi_longest_ns) diff --git a/drivers/clk/clk-nomadik.c b/drivers/clk/clk-nomadik.c index 51410c2ac2cb..4d978a3c88f7 100644 --- a/drivers/clk/clk-nomadik.c +++ b/drivers/clk/clk-nomadik.c @@ -27,6 +27,14 @@ */ #define SRC_CR 0x00U +#define SRC_CR_T0_ENSEL BIT(15) +#define SRC_CR_T1_ENSEL BIT(17) +#define SRC_CR_T2_ENSEL BIT(19) +#define SRC_CR_T3_ENSEL BIT(21) +#define SRC_CR_T4_ENSEL BIT(23) +#define SRC_CR_T5_ENSEL BIT(25) +#define SRC_CR_T6_ENSEL BIT(27) +#define SRC_CR_T7_ENSEL BIT(29) #define SRC_XTALCR 0x0CU #define SRC_XTALCR_XTALTIMEN BIT(20) #define SRC_XTALCR_SXTALDIS BIT(19) @@ -543,6 +551,19 @@ void __init nomadik_clk_init(void) __func__, np->name); return; } + + /* Set all timers to use the 2.4 MHz TIMCLK */ + val = readl(src_base + SRC_CR); + val |= SRC_CR_T0_ENSEL; + val |= SRC_CR_T1_ENSEL; + val |= SRC_CR_T2_ENSEL; + val |= SRC_CR_T3_ENSEL; + val |= SRC_CR_T4_ENSEL; + val |= SRC_CR_T5_ENSEL; + val |= SRC_CR_T6_ENSEL; + val |= SRC_CR_T7_ENSEL; + writel(val, src_base + SRC_CR); + val = readl(src_base + SRC_XTALCR); pr_info("SXTALO is %s\n", (val & SRC_XTALCR_SXTALDIS) ? "disabled" : "enabled"); diff --git a/drivers/clk/mvebu/armada-370.c b/drivers/clk/mvebu/armada-370.c index fc777bdc1886..81a202d12a7a 100644 --- a/drivers/clk/mvebu/armada-370.c +++ b/drivers/clk/mvebu/armada-370.c @@ -39,8 +39,8 @@ static const struct coreclk_ratio a370_coreclk_ratios[] __initconst = { }; static const u32 a370_tclk_freqs[] __initconst = { - 16600000, - 20000000, + 166000000, + 200000000, }; static u32 __init a370_get_tclk_freq(void __iomem *sar) diff --git a/drivers/clk/socfpga/clk.c b/drivers/clk/socfpga/clk.c index 5bb848cac6ec..81dd31a686df 100644 --- a/drivers/clk/socfpga/clk.c +++ b/drivers/clk/socfpga/clk.c @@ -49,7 +49,7 @@ #define SOCFPGA_L4_SP_CLK "l4_sp_clk" #define SOCFPGA_NAND_CLK "nand_clk" #define SOCFPGA_NAND_X_CLK "nand_x_clk" -#define SOCFPGA_MMC_CLK "mmc_clk" +#define SOCFPGA_MMC_CLK "sdmmc_clk" #define SOCFPGA_DB_CLK "gpio_db_clk" #define div_mask(width) ((1 << (width)) - 1) diff --git a/drivers/clk/versatile/clk-icst.c b/drivers/clk/versatile/clk-icst.c index 67ccf4aa7277..f5e4c21b301f 100644 --- a/drivers/clk/versatile/clk-icst.c +++ b/drivers/clk/versatile/clk-icst.c @@ -107,7 +107,7 @@ static int icst_set_rate(struct clk_hw *hw, unsigned long rate, vco = icst_hz_to_vco(icst->params, rate); icst->rate = icst_hz(icst->params, vco); - vco_set(icst->vcoreg, icst->lockreg, vco); + vco_set(icst->lockreg, icst->vcoreg, vco); return 0; } diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index db59bb9fbe23..10d1de5bce6f 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -107,8 +107,6 @@ static unsigned int intel_crt_get_flags(struct intel_encoder *encoder) static void intel_crt_get_config(struct intel_encoder *encoder, struct intel_crtc_config *pipe_config) { - struct drm_device *dev = encoder->base.dev; - pipe_config->adjusted_mode.flags |= intel_crt_get_flags(encoder); } diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index c45f9c0a1b34..b21d553c245b 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -904,6 +904,7 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_LUMEL_PD12_PID) }, /* Crucible Devices */ { USB_DEVICE(FTDI_VID, FTDI_CT_COMET_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_Z3X_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 1b8af461b522..a7019d1e3058 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1307,3 +1307,9 @@ * Manufacturer: Crucible Technologies */ #define FTDI_CT_COMET_PID 0x8e08 + +/* + * Product: Z3X Box + * Manufacturer: Smart GSM Team + */ +#define FTDI_Z3X_PID 0x0011 diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index bedf8e47713b..1e6de4cd079d 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -4,11 +4,6 @@ * Copyright (C) 2001-2007 Greg Kroah-Hartman (greg@kroah.com) * Copyright (C) 2003 IBM Corp. * - * Copyright (C) 2009, 2013 Frank Schäfer <fschaefer.oss@googlemail.com> - * - fixes, improvements and documentation for the baud rate encoding methods - * Copyright (C) 2013 Reinhard Max <max@suse.de> - * - fixes and improvements for the divisor based baud rate encoding method - * * Original driver for 2.2.x by anonymous * * This program is free software; you can redistribute it and/or @@ -134,18 +129,10 @@ MODULE_DEVICE_TABLE(usb, id_table); enum pl2303_type { - type_0, /* H version ? */ - type_1, /* H version ? */ - HX_TA, /* HX(A) / X(A) / TA version */ /* TODO: improve */ - HXD_EA_RA_SA, /* HXD / EA / RA / SA version */ /* TODO: improve */ - TB, /* TB version */ - HX_CLONE, /* Cheap and less functional clone of the HX chip */ + type_0, /* don't know the difference between type 0 and */ + type_1, /* type 1, until someone from prolific tells us... */ + HX, /* HX version of the pl2303 chip */ }; -/* - * NOTE: don't know the difference between type 0 and type 1, - * until someone from Prolific tells us... - * TODO: distinguish between X/HX, TA and HXD, EA, RA, SA variants - */ struct pl2303_serial_private { enum pl2303_type type; @@ -185,7 +172,6 @@ static int pl2303_startup(struct usb_serial *serial) { struct pl2303_serial_private *spriv; enum pl2303_type type = type_0; - char *type_str = "unknown (treating as type_0)"; unsigned char *buf; spriv = kzalloc(sizeof(*spriv), GFP_KERNEL); @@ -198,53 +184,15 @@ static int pl2303_startup(struct usb_serial *serial) return -ENOMEM; } - if (serial->dev->descriptor.bDeviceClass == 0x02) { + if (serial->dev->descriptor.bDeviceClass == 0x02) type = type_0; - type_str = "type_0"; - } else if (serial->dev->descriptor.bMaxPacketSize0 == 0x40) { - /* - * NOTE: The bcdDevice version is the only difference between - * the device descriptors of the X/HX, HXD, EA, RA, SA, TA, TB - */ - if (le16_to_cpu(serial->dev->descriptor.bcdDevice) == 0x300) { - /* Check if the device is a clone */ - pl2303_vendor_read(0x9494, 0, serial, buf); - /* - * NOTE: Not sure if this read is really needed. - * The HX returns 0x00, the clone 0x02, but the Windows - * driver seems to ignore the value and continues. - */ - pl2303_vendor_write(0x0606, 0xaa, serial); - pl2303_vendor_read(0x8686, 0, serial, buf); - if (buf[0] != 0xaa) { - type = HX_CLONE; - type_str = "X/HX clone (limited functionality)"; - } else { - type = HX_TA; - type_str = "X/HX/TA"; - } - pl2303_vendor_write(0x0606, 0x00, serial); - } else if (le16_to_cpu(serial->dev->descriptor.bcdDevice) - == 0x400) { - type = HXD_EA_RA_SA; - type_str = "HXD/EA/RA/SA"; - } else if (le16_to_cpu(serial->dev->descriptor.bcdDevice) - == 0x500) { - type = TB; - type_str = "TB"; - } else { - dev_info(&serial->interface->dev, - "unknown/unsupported device type\n"); - kfree(spriv); - kfree(buf); - return -ENODEV; - } - } else if (serial->dev->descriptor.bDeviceClass == 0x00 - || serial->dev->descriptor.bDeviceClass == 0xFF) { + else if (serial->dev->descriptor.bMaxPacketSize0 == 0x40) + type = HX; + else if (serial->dev->descriptor.bDeviceClass == 0x00) type = type_1; - type_str = "type_1"; - } - dev_dbg(&serial->interface->dev, "device type: %s\n", type_str); + else if (serial->dev->descriptor.bDeviceClass == 0xFF) + type = type_1; + dev_dbg(&serial->interface->dev, "device type: %d\n", type); spriv->type = type; usb_set_serial_data(serial, spriv); @@ -259,10 +207,10 @@ static int pl2303_startup(struct usb_serial *serial) pl2303_vendor_read(0x8383, 0, serial, buf); pl2303_vendor_write(0, 1, serial); pl2303_vendor_write(1, 0, serial); - if (type == type_0 || type == type_1) - pl2303_vendor_write(2, 0x24, serial); - else + if (type == HX) pl2303_vendor_write(2, 0x44, serial); + else + pl2303_vendor_write(2, 0x24, serial); kfree(buf); return 0; @@ -316,174 +264,65 @@ static int pl2303_set_control_lines(struct usb_serial_port *port, u8 value) return retval; } -static int pl2303_baudrate_encode_direct(int baud, enum pl2303_type type, - u8 buf[4]) +static void pl2303_encode_baudrate(struct tty_struct *tty, + struct usb_serial_port *port, + u8 buf[4]) { - /* - * NOTE: Only the values defined in baud_sup are supported ! - * => if unsupported values are set, the PL2303 uses 9600 baud instead - * => HX clones just don't work at unsupported baud rates < 115200 baud, - * for baud rates > 115200 they run at 115200 baud - */ const int baud_sup[] = { 75, 150, 300, 600, 1200, 1800, 2400, 3600, - 4800, 7200, 9600, 14400, 19200, 28800, 38400, - 57600, 115200, 230400, 460800, 614400, 921600, - 1228800, 2457600, 3000000, 6000000, 12000000 }; + 4800, 7200, 9600, 14400, 19200, 28800, 38400, + 57600, 115200, 230400, 460800, 500000, 614400, + 921600, 1228800, 2457600, 3000000, 6000000 }; + + struct usb_serial *serial = port->serial; + struct pl2303_serial_private *spriv = usb_get_serial_data(serial); + int baud; + int i; + /* - * NOTE: With the exception of type_0/1 devices, the following - * additional baud rates are supported (tested with HX rev. 3A only): - * 110*, 56000*, 128000, 134400, 161280, 201600, 256000*, 268800, - * 403200, 806400. (*: not HX and HX clones) - * - * Maximum values: HXD, TB: 12000000; HX, TA: 6000000; - * type_0+1: 1228800; RA: 921600; HX clones, SA: 115200 - * - * As long as we are not using this encoding method for anything else - * than the type_0+1, HX and HX clone chips, there is no point in - * complicating the code to support them. + * NOTE: Only the values defined in baud_sup are supported! + * => if unsupported values are set, the PL2303 seems to use + * 9600 baud (at least my PL2303X always does) */ - int i; + baud = tty_get_baud_rate(tty); + dev_dbg(&port->dev, "baud requested = %d\n", baud); + if (!baud) + return; /* Set baudrate to nearest supported value */ for (i = 0; i < ARRAY_SIZE(baud_sup); ++i) { if (baud_sup[i] > baud) break; } + if (i == ARRAY_SIZE(baud_sup)) baud = baud_sup[i - 1]; else if (i > 0 && (baud_sup[i] - baud) > (baud - baud_sup[i - 1])) baud = baud_sup[i - 1]; else baud = baud_sup[i]; - /* Respect the chip type specific baud rate limits */ - /* - * FIXME: as long as we don't know how to distinguish between the - * HXD, EA, RA, and SA chip variants, allow the max. value of 12M. - */ - if (type == HX_TA) - baud = min_t(int, baud, 6000000); - else if (type == type_0 || type == type_1) - baud = min_t(int, baud, 1228800); - else if (type == HX_CLONE) - baud = min_t(int, baud, 115200); - /* Direct (standard) baud rate encoding method */ - put_unaligned_le32(baud, buf); - - return baud; -} -static int pl2303_baudrate_encode_divisor(int baud, enum pl2303_type type, - u8 buf[4]) -{ - /* - * Divisor based baud rate encoding method - * - * NOTE: HX clones do NOT support this method. - * It's not clear if the type_0/1 chips support it. - * - * divisor = 12MHz * 32 / baudrate = 2^A * B - * - * with - * - * A = buf[1] & 0x0e - * B = buf[0] + (buf[1] & 0x01) << 8 - * - * Special cases: - * => 8 < B < 16: device seems to work not properly - * => B <= 8: device uses the max. value B = 512 instead - */ - unsigned int A, B; + /* type_0, type_1 only support up to 1228800 baud */ + if (spriv->type != HX) + baud = min_t(int, baud, 1228800); - /* - * NOTE: The Windows driver allows maximum baud rates of 110% of the - * specified maximium value. - * Quick tests with early (2004) HX (rev. A) chips suggest, that even - * higher baud rates (up to the maximum of 24M baud !) are working fine, - * but that should really be tested carefully in "real life" scenarios - * before removing the upper limit completely. - * Baud rates smaller than the specified 75 baud are definitely working - * fine. - */ - if (type == type_0 || type == type_1) - baud = min_t(int, baud, 1228800 * 1.1); - else if (type == HX_TA) - baud = min_t(int, baud, 6000000 * 1.1); - else if (type == HXD_EA_RA_SA) - /* HXD, EA: 12Mbps; RA: 1Mbps; SA: 115200 bps */ - /* - * FIXME: as long as we don't know how to distinguish between - * these chip variants, allow the max. of these values - */ - baud = min_t(int, baud, 12000000 * 1.1); - else if (type == TB) - baud = min_t(int, baud, 12000000 * 1.1); - /* Determine factors A and B */ - A = 0; - B = 12000000 * 32 / baud; /* 12MHz */ - B <<= 1; /* Add one bit for rounding */ - while (B > (512 << 1) && A <= 14) { - A += 2; - B >>= 2; - } - if (A > 14) { /* max. divisor = min. baudrate reached */ - A = 14; - B = 512; - /* => ~45.78 baud */ + if (baud <= 115200) { + put_unaligned_le32(baud, buf); } else { - B = (B + 1) >> 1; /* Round the last bit */ - } - /* Handle special cases */ - if (B == 512) - B = 0; /* also: 1 to 8 */ - else if (B < 16) /* - * NOTE: With the current algorithm this happens - * only for A=0 and means that the min. divisor - * (respectively: the max. baudrate) is reached. + * Apparently the formula for higher speeds is: + * baudrate = 12M * 32 / (2^buf[1]) / buf[0] */ - B = 16; /* => 24 MBaud */ - /* Encode the baud rate */ - buf[3] = 0x80; /* Select divisor encoding method */ - buf[2] = 0; - buf[1] = (A & 0x0e); /* A */ - buf[1] |= ((B & 0x100) >> 8); /* MSB of B */ - buf[0] = B & 0xff; /* 8 LSBs of B */ - /* Calculate the actual/resulting baud rate */ - if (B <= 8) - B = 512; - baud = 12000000 * 32 / ((1 << A) * B); - - return baud; -} - -static void pl2303_encode_baudrate(struct tty_struct *tty, - struct usb_serial_port *port, - enum pl2303_type type, - u8 buf[4]) -{ - int baud; + unsigned tmp = 12000000 * 32 / baud; + buf[3] = 0x80; + buf[2] = 0; + buf[1] = (tmp >= 256); + while (tmp >= 256) { + tmp >>= 2; + buf[1] <<= 1; + } + buf[0] = tmp; + } - baud = tty_get_baud_rate(tty); - dev_dbg(&port->dev, "baud requested = %d\n", baud); - if (!baud) - return; - /* - * There are two methods for setting/encoding the baud rate - * 1) Direct method: encodes the baud rate value directly - * => supported by all chip types - * 2) Divisor based method: encodes a divisor to a base value (12MHz*32) - * => not supported by HX clones (and likely type_0/1 chips) - * - * NOTE: Although the divisor based baud rate encoding method is much - * more flexible, some of the standard baud rate values can not be - * realized exactly. But the difference is very small (max. 0.2%) and - * the device likely uses the same baud rate generator for both methods - * so that there is likley no difference. - */ - if (type == type_0 || type == type_1 || type == HX_CLONE) - baud = pl2303_baudrate_encode_direct(baud, type, buf); - else - baud = pl2303_baudrate_encode_divisor(baud, type, buf); /* Save resulting baud rate */ tty_encode_baud_rate(tty, baud, baud); dev_dbg(&port->dev, "baud set = %d\n", baud); @@ -540,8 +379,8 @@ static void pl2303_set_termios(struct tty_struct *tty, dev_dbg(&port->dev, "data bits = %d\n", buf[6]); } - /* For reference: buf[0]:buf[3] baud rate value */ - pl2303_encode_baudrate(tty, port, spriv->type, buf); + /* For reference buf[0]:buf[3] baud rate value */ + pl2303_encode_baudrate(tty, port, &buf[0]); /* For reference buf[4]=0 is 1 stop bits */ /* For reference buf[4]=1 is 1.5 stop bits */ @@ -618,10 +457,10 @@ static void pl2303_set_termios(struct tty_struct *tty, dev_dbg(&port->dev, "0xa1:0x21:0:0 %d - %7ph\n", i, buf); if (C_CRTSCTS(tty)) { - if (spriv->type == type_0 || spriv->type == type_1) - pl2303_vendor_write(0x0, 0x41, serial); - else + if (spriv->type == HX) pl2303_vendor_write(0x0, 0x61, serial); + else + pl2303_vendor_write(0x0, 0x41, serial); } else { pl2303_vendor_write(0x0, 0x0, serial); } @@ -658,7 +497,7 @@ static int pl2303_open(struct tty_struct *tty, struct usb_serial_port *port) struct pl2303_serial_private *spriv = usb_get_serial_data(serial); int result; - if (spriv->type == type_0 || spriv->type == type_1) { + if (spriv->type != HX) { usb_clear_halt(serial->dev, port->write_urb->pipe); usb_clear_halt(serial->dev, port->read_urb->pipe); } else { @@ -833,7 +672,6 @@ static void pl2303_break_ctl(struct tty_struct *tty, int break_state) result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), BREAK_REQUEST, BREAK_REQUEST_TYPE, state, 0, NULL, 0, 100); - /* NOTE: HX clones don't support sending breaks, -EPIPE is returned */ if (result) dev_err(&port->dev, "error sending break = %d\n", result); } diff --git a/fs/dcache.c b/fs/dcache.c index 20532cb0b06e..ae6ebb88ceff 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -542,7 +542,7 @@ EXPORT_SYMBOL(d_drop); * If ref is non-zero, then decrement the refcount too. * Returns dentry requiring refcount drop, or NULL if we're done. */ -static inline struct dentry * +static struct dentry * dentry_kill(struct dentry *dentry, int unlock_on_failure) __releases(dentry->d_lock) { @@ -630,7 +630,8 @@ repeat: goto kill_it; } - dentry->d_flags |= DCACHE_REFERENCED; + if (!(dentry->d_flags & DCACHE_REFERENCED)) + dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); dentry->d_lockref.count--; diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 19c19a5eee29..f6c82de12541 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -34,9 +34,9 @@ struct ipc_namespace { int sem_ctls[4]; int used_sems; - int msg_ctlmax; - int msg_ctlmnb; - int msg_ctlmni; + unsigned int msg_ctlmax; + unsigned int msg_ctlmnb; + unsigned int msg_ctlmni; atomic_t msg_bytes; atomic_t msg_hdrs; int auto_msgmni; diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 4106721c4e5e..45a0a9e81478 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -19,6 +19,21 @@ */ /* + * INIT_LIST_HEAD_RCU - Initialize a list_head visible to RCU readers + * @list: list to be initialized + * + * You should instead use INIT_LIST_HEAD() for normal initialization and + * cleanup tasks, when readers have no access to the list being initialized. + * However, if the list being initialized is visible to readers, you + * need to keep the compiler from being too mischievous. + */ +static inline void INIT_LIST_HEAD_RCU(struct list_head *list) +{ + ACCESS_ONCE(list->next) = list; + ACCESS_ONCE(list->prev) = list; +} + +/* * return the ->next pointer of a list_head in an rcu safe * way, we must not access it directly */ @@ -191,9 +206,13 @@ static inline void list_splice_init_rcu(struct list_head *list, if (list_empty(list)) return; - /* "first" and "last" tracking list, so initialize it. */ + /* + * "first" and "last" tracking list, so initialize it. RCU readers + * have access to this list, so we must use INIT_LIST_HEAD_RCU() + * instead of INIT_LIST_HEAD(). + */ - INIT_LIST_HEAD(list); + INIT_LIST_HEAD_RCU(list); /* * At this point, the list body still points to the source list. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f1f1bc39346b..39cbb889e20d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -261,6 +261,10 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, rcu_irq_exit(); \ } while (0) +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) +extern bool __rcu_is_watching(void); +#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */ + /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. @@ -297,10 +301,6 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) -extern int rcu_is_cpu_idle(void); -#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */ - #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) bool rcu_lockdep_current_cpu_online(void); #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ @@ -351,7 +351,7 @@ static inline int rcu_read_lock_held(void) { if (!debug_lockdep_rcu_enabled()) return 1; - if (rcu_is_cpu_idle()) + if (!rcu_is_watching()) return 0; if (!rcu_lockdep_current_cpu_online()) return 0; @@ -402,7 +402,7 @@ static inline int rcu_read_lock_sched_held(void) if (!debug_lockdep_rcu_enabled()) return 1; - if (rcu_is_cpu_idle()) + if (!rcu_is_watching()) return 0; if (!rcu_lockdep_current_cpu_online()) return 0; @@ -771,7 +771,7 @@ static inline void rcu_read_lock(void) __rcu_read_lock(); __acquire(RCU); rcu_lock_acquire(&rcu_lock_map); - rcu_lockdep_assert(!rcu_is_cpu_idle(), + rcu_lockdep_assert(rcu_is_watching(), "rcu_read_lock() used illegally while idle"); } @@ -792,7 +792,7 @@ static inline void rcu_read_lock(void) */ static inline void rcu_read_unlock(void) { - rcu_lockdep_assert(!rcu_is_cpu_idle(), + rcu_lockdep_assert(rcu_is_watching(), "rcu_read_unlock() used illegally while idle"); rcu_lock_release(&rcu_lock_map); __release(RCU); @@ -821,7 +821,7 @@ static inline void rcu_read_lock_bh(void) local_bh_disable(); __acquire(RCU_BH); rcu_lock_acquire(&rcu_bh_lock_map); - rcu_lockdep_assert(!rcu_is_cpu_idle(), + rcu_lockdep_assert(rcu_is_watching(), "rcu_read_lock_bh() used illegally while idle"); } @@ -832,7 +832,7 @@ static inline void rcu_read_lock_bh(void) */ static inline void rcu_read_unlock_bh(void) { - rcu_lockdep_assert(!rcu_is_cpu_idle(), + rcu_lockdep_assert(rcu_is_watching(), "rcu_read_unlock_bh() used illegally while idle"); rcu_lock_release(&rcu_bh_lock_map); __release(RCU_BH); @@ -857,7 +857,7 @@ static inline void rcu_read_lock_sched(void) preempt_disable(); __acquire(RCU_SCHED); rcu_lock_acquire(&rcu_sched_lock_map); - rcu_lockdep_assert(!rcu_is_cpu_idle(), + rcu_lockdep_assert(rcu_is_watching(), "rcu_read_lock_sched() used illegally while idle"); } @@ -875,7 +875,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void) */ static inline void rcu_read_unlock_sched(void) { - rcu_lockdep_assert(!rcu_is_cpu_idle(), + rcu_lockdep_assert(rcu_is_watching(), "rcu_read_unlock_sched() used illegally while idle"); rcu_lock_release(&rcu_sched_lock_map); __release(RCU_SCHED); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index e31005ee339e..09ebcbe9fd78 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -132,4 +132,21 @@ static inline void rcu_scheduler_starting(void) } #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) + +static inline bool rcu_is_watching(void) +{ + return __rcu_is_watching(); +} + +#else /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ + +static inline bool rcu_is_watching(void) +{ + return true; +} + + +#endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ + #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 226169d1bd2b..4b9c81548742 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -90,4 +90,6 @@ extern void exit_rcu(void); extern void rcu_scheduler_starting(void); extern int rcu_scheduler_active __read_mostly; +extern bool rcu_is_watching(void); + #endif /* __LINUX_RCUTREE_H */ diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 10d16c4fbe89..41467f8ff8ec 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -2,8 +2,8 @@ #define _SCHED_SYSCTL_H #ifdef CONFIG_DETECT_HUNG_TASK +extern int sysctl_hung_task_check_count; extern unsigned int sysctl_hung_task_panic; -extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index ee2376cfaab3..aca382266411 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -39,15 +39,26 @@ TRACE_EVENT(rcu_utilization, #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) /* - * Tracepoint for grace-period events: starting and ending a grace - * period ("start" and "end", respectively), a CPU noting the start - * of a new grace period or the end of an old grace period ("cpustart" - * and "cpuend", respectively), a CPU passing through a quiescent - * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" - * and "cpuofl", respectively), a CPU being kicked for being too - * long in dyntick-idle mode ("kick"), a CPU accelerating its new - * callbacks to RCU_NEXT_READY_TAIL ("AccReadyCB"), and a CPU - * accelerating its new callbacks to RCU_WAIT_TAIL ("AccWaitCB"). + * Tracepoint for grace-period events. Takes a string identifying the + * RCU flavor, the grace-period number, and a string identifying the + * grace-period-related event as follows: + * + * "AccReadyCB": CPU acclerates new callbacks to RCU_NEXT_READY_TAIL. + * "AccWaitCB": CPU accelerates new callbacks to RCU_WAIT_TAIL. + * "newreq": Request a new grace period. + * "start": Start a grace period. + * "cpustart": CPU first notices a grace-period start. + * "cpuqs": CPU passes through a quiescent state. + * "cpuonl": CPU comes online. + * "cpuofl": CPU goes offline. + * "reqwait": GP kthread sleeps waiting for grace-period request. + * "reqwaitsig": GP kthread awakened by signal from reqwait state. + * "fqswait": GP kthread waiting until time to force quiescent states. + * "fqsstart": GP kthread starts forcing quiescent states. + * "fqsend": GP kthread done forcing quiescent states. + * "fqswaitsig": GP kthread awakened by signal from fqswait state. + * "end": End a grace period. + * "cpuend": CPU first notices a grace-period end. */ TRACE_EVENT(rcu_grace_period, @@ -161,6 +172,46 @@ TRACE_EVENT(rcu_grace_period_init, ); /* + * Tracepoint for RCU no-CBs CPU callback handoffs. This event is intended + * to assist debugging of these handoffs. + * + * The first argument is the name of the RCU flavor, and the second is + * the number of the offloaded CPU are extracted. The third and final + * argument is a string as follows: + * + * "WakeEmpty": Wake rcuo kthread, first CB to empty list. + * "WakeOvf": Wake rcuo kthread, CB list is huge. + * "WakeNot": Don't wake rcuo kthread. + * "WakeNotPoll": Don't wake rcuo kthread because it is polling. + * "Poll": Start of new polling cycle for rcu_nocb_poll. + * "Sleep": Sleep waiting for CBs for !rcu_nocb_poll. + * "WokeEmpty": rcuo kthread woke to find empty list. + * "WokeNonEmpty": rcuo kthread woke to find non-empty list. + * "WaitQueue": Enqueue partially done, timed wait for it to complete. + * "WokeQueue": Partial enqueue now complete. + */ +TRACE_EVENT(rcu_nocb_wake, + + TP_PROTO(const char *rcuname, int cpu, const char *reason), + + TP_ARGS(rcuname, cpu, reason), + + TP_STRUCT__entry( + __field(const char *, rcuname) + __field(int, cpu) + __field(const char *, reason) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->cpu = cpu; + __entry->reason = reason; + ), + + TP_printk("%s %d %s", __entry->rcuname, __entry->cpu, __entry->reason) +); + +/* * Tracepoint for tasks blocking within preemptible-RCU read-side * critical sections. Track the type of RCU (which one day might * include SRCU), the grace-period number that the task is blocking @@ -540,17 +591,17 @@ TRACE_EVENT(rcu_invoke_kfree_callback, TRACE_EVENT(rcu_batch_end, TP_PROTO(const char *rcuname, int callbacks_invoked, - bool cb, bool nr, bool iit, bool risk), + char cb, char nr, char iit, char risk), TP_ARGS(rcuname, callbacks_invoked, cb, nr, iit, risk), TP_STRUCT__entry( __field(const char *, rcuname) __field(int, callbacks_invoked) - __field(bool, cb) - __field(bool, nr) - __field(bool, iit) - __field(bool, risk) + __field(char, cb) + __field(char, nr) + __field(char, iit) + __field(char, risk) ), TP_fast_assign( @@ -656,6 +707,7 @@ TRACE_EVENT(rcu_barrier, #define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \ level, grplo, grphi, event) \ do { } while (0) +#define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0) #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 613381bcde40..04c308413a5d 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -424,6 +424,25 @@ TRACE_EVENT(sched_pi_setprio, __entry->oldprio, __entry->newprio) ); +#ifdef CONFIG_DETECT_HUNG_TASK +TRACE_EVENT(sched_process_hang, + TP_PROTO(struct task_struct *tsk), + TP_ARGS(tsk), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + ), + + TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) +); +#endif /* CONFIG_DETECT_HUNG_TASK */ + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 009a655a5d35..2fc1602e23bb 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -456,13 +456,15 @@ struct perf_event_mmap_page { /* * Control data for the mmap() data buffer. * - * User-space reading the @data_head value should issue an rmb(), on - * SMP capable platforms, after reading this value -- see - * perf_event_wakeup(). + * User-space reading the @data_head value should issue an smp_rmb(), + * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data. In this case - * the kernel will not over-write unread data. + * written by userspace to reflect the last read data, after issueing + * an smp_mb() to separate the data read from the ->data_tail store. + * In this case the kernel will not over-write unread data. + * + * See perf_output_put_handle() for the data ordering. */ __u64 data_head; /* head in the data section */ __u64 data_tail; /* user-space written tail */ diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 130dfece27ac..b0e99deb6d05 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -62,7 +62,7 @@ static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write, return err; } -static int proc_ipc_callback_dointvec(ctl_table *table, int write, +static int proc_ipc_callback_dointvec_minmax(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; @@ -72,7 +72,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); - rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos); + rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); if (write && !rc && lenp_bef == *lenp) /* @@ -152,15 +152,13 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, #define proc_ipc_dointvec NULL #define proc_ipc_dointvec_minmax NULL #define proc_ipc_dointvec_minmax_orphans NULL -#define proc_ipc_callback_dointvec NULL +#define proc_ipc_callback_dointvec_minmax NULL #define proc_ipcauto_dointvec_minmax NULL #endif static int zero; static int one = 1; -#ifdef CONFIG_CHECKPOINT_RESTORE static int int_max = INT_MAX; -#endif static struct ctl_table ipc_kern_table[] = { { @@ -198,21 +196,27 @@ static struct ctl_table ipc_kern_table[] = { .data = &init_ipc_ns.msg_ctlmax, .maxlen = sizeof (init_ipc_ns.msg_ctlmax), .mode = 0644, - .proc_handler = proc_ipc_dointvec, + .proc_handler = proc_ipc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "msgmni", .data = &init_ipc_ns.msg_ctlmni, .maxlen = sizeof (init_ipc_ns.msg_ctlmni), .mode = 0644, - .proc_handler = proc_ipc_callback_dointvec, + .proc_handler = proc_ipc_callback_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "msgmnb", .data = &init_ipc_ns.msg_ctlmnb, .maxlen = sizeof (init_ipc_ns.msg_ctlmnb), .mode = 0644, - .proc_handler = proc_ipc_dointvec, + .proc_handler = proc_ipc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "sem", diff --git a/kernel/Makefile b/kernel/Makefile index b3d51e229356..a4d1aa8da9bc 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -6,9 +6,9 @@ obj-y = fork.o exec_domain.o panic.o \ cpu.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ - rcupdate.o extable.o params.o posix-timers.o \ + extable.o params.o posix-timers.o \ kthread.o sys_ni.o posix-cpu-timers.o mutex.o \ - hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ + hrtimer.o rwsem.o nsproxy.o semaphore.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o groups.o lglock.o smpboot.o @@ -27,6 +27,7 @@ obj-y += power/ obj-y += printk/ obj-y += cpu/ obj-y += irq/ +obj-y += rcu/ obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o obj-$(CONFIG_FREEZER) += freezer.o @@ -81,12 +82,6 @@ obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o obj-$(CONFIG_SECCOMP) += seccomp.o -obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o -obj-$(CONFIG_TREE_RCU) += rcutree.o -obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o -obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o -obj-$(CONFIG_TINY_RCU) += rcutiny.o -obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index cd55144270b5..9c2ddfbf4525 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -87,10 +87,31 @@ again: goto out; /* - * Publish the known good head. Rely on the full barrier implied - * by atomic_dec_and_test() order the rb->head read and this - * write. + * Since the mmap() consumer (userspace) can run on a different CPU: + * + * kernel user + * + * READ ->data_tail READ ->data_head + * smp_mb() (A) smp_rmb() (C) + * WRITE $data READ $data + * smp_wmb() (B) smp_mb() (D) + * STORE ->data_head WRITE ->data_tail + * + * Where A pairs with D, and B pairs with C. + * + * I don't think A needs to be a full barrier because we won't in fact + * write data until we see the store from userspace. So we simply don't + * issue the data WRITE until we observe it. Be conservative for now. + * + * OTOH, D needs to be a full barrier since it separates the data READ + * from the tail WRITE. + * + * For B a WMB is sufficient since it separates two WRITEs, and for C + * an RMB is sufficient since it separates two READs. + * + * See perf_output_begin(). */ + smp_wmb(); rb->user_page->data_head = head; /* @@ -154,9 +175,11 @@ int perf_output_begin(struct perf_output_handle *handle, * Userspace could choose to issue a mb() before updating the * tail pointer. So that all reads will be completed before the * write is issued. + * + * See perf_output_put_handle(). */ tail = ACCESS_ONCE(rb->user_page->data_tail); - smp_rmb(); + smp_mb(); offset = head = local_read(&rb->head); head += size; if (unlikely(!perf_output_space(rb, tail, offset, head))) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 3e97fb126e6b..8807061ca004 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -16,11 +16,12 @@ #include <linux/export.h> #include <linux/sysctl.h> #include <linux/utsname.h> +#include <trace/events/sched.h> /* * The number of tasks checked: */ -unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; +int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; /* * Limit number of tasks checked in a batch. @@ -92,6 +93,9 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) t->last_switch_count = switch_count; return; } + + trace_sched_process_hang(t); + if (!sysctl_hung_task_warnings) return; sysctl_hung_task_warnings--; diff --git a/kernel/lockdep.c b/kernel/lockdep.c index e16c45b9ee77..4e8e14c34e42 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -4224,7 +4224,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n", !rcu_lockdep_current_cpu_online() ? "RCU used illegally from offline CPU!\n" - : rcu_is_cpu_idle() + : !rcu_is_watching() ? "RCU used illegally from idle CPU!\n" : "", rcu_scheduler_active, debug_locks); @@ -4247,7 +4247,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) * So complain bitterly if someone does call rcu_read_lock(), * rcu_read_lock_bh() and so on from extended quiescent states. */ - if (rcu_is_cpu_idle()) + if (!rcu_is_watching()) printk("RCU used illegally from extended quiescent state!\n"); lockdep_print_held_locks(curr); diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index b2c71c5873e4..09220656d888 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -421,6 +421,7 @@ static void seq_lock_time(struct seq_file *m, struct lock_time *lt) seq_time(m, lt->min); seq_time(m, lt->max); seq_time(m, lt->total); + seq_time(m, lt->nr ? do_div(lt->total, lt->nr) : 0); } static void seq_stats(struct seq_file *m, struct lock_stat_data *data) @@ -518,20 +519,20 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) } if (i) { seq_puts(m, "\n"); - seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); + seq_line(m, '.', 0, 40 + 1 + 12 * (14 + 1)); seq_puts(m, "\n"); } } static void seq_header(struct seq_file *m) { - seq_printf(m, "lock_stat version 0.3\n"); + seq_puts(m, "lock_stat version 0.4\n"); if (unlikely(!debug_locks)) seq_printf(m, "*WARNING* lock debugging disabled!! - possibly due to a lockdep warning\n"); - seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); - seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " + seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1)); + seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s " "%14s %14s\n", "class name", "con-bounces", @@ -539,12 +540,14 @@ static void seq_header(struct seq_file *m) "waittime-min", "waittime-max", "waittime-total", + "waittime-avg", "acq-bounces", "acquisitions", "holdtime-min", "holdtime-max", - "holdtime-total"); - seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); + "holdtime-total", + "holdtime-avg"); + seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1)); seq_printf(m, "\n"); } diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile new file mode 100644 index 000000000000..01e9ec37a3e3 --- /dev/null +++ b/kernel/rcu/Makefile @@ -0,0 +1,6 @@ +obj-y += update.o srcu.o +obj-$(CONFIG_RCU_TORTURE_TEST) += torture.o +obj-$(CONFIG_TREE_RCU) += tree.o +obj-$(CONFIG_TREE_PREEMPT_RCU) += tree.o +obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o +obj-$(CONFIG_TINY_RCU) += tiny.o diff --git a/kernel/rcu.h b/kernel/rcu/rcu.h index 77131966c4ad..7859a0a3951e 100644 --- a/kernel/rcu.h +++ b/kernel/rcu/rcu.h @@ -122,4 +122,11 @@ int rcu_jiffies_till_stall_check(void); #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ +/* + * Strings used in tracepoints need to be exported via the + * tracing system such that tools like perf and trace-cmd can + * translate the string address pointers to actual text. + */ +#define TPS(x) tracepoint_string(x) + #endif /* __LINUX_RCU_H */ diff --git a/kernel/srcu.c b/kernel/rcu/srcu.c index 01d5ccb8bfe3..01d5ccb8bfe3 100644 --- a/kernel/srcu.c +++ b/kernel/rcu/srcu.c diff --git a/kernel/rcutiny.c b/kernel/rcu/tiny.c index 9ed6075dc562..0c9a934cfec1 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcu/tiny.c @@ -35,6 +35,7 @@ #include <linux/time.h> #include <linux/cpu.h> #include <linux/prefetch.h> +#include <linux/ftrace_event.h> #ifdef CONFIG_RCU_TRACE #include <trace/events/rcu.h> @@ -42,7 +43,7 @@ #include "rcu.h" -/* Forward declarations for rcutiny_plugin.h. */ +/* Forward declarations for tiny_plugin.h. */ struct rcu_ctrlblk; static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); static void rcu_process_callbacks(struct softirq_action *unused); @@ -52,22 +53,23 @@ static void __call_rcu(struct rcu_head *head, static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; -#include "rcutiny_plugin.h" +#include "tiny_plugin.h" /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ static void rcu_idle_enter_common(long long newval) { if (newval) { - RCU_TRACE(trace_rcu_dyntick("--=", + RCU_TRACE(trace_rcu_dyntick(TPS("--="), rcu_dynticks_nesting, newval)); rcu_dynticks_nesting = newval; return; } - RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting, newval)); + RCU_TRACE(trace_rcu_dyntick(TPS("Start"), + rcu_dynticks_nesting, newval)); if (!is_idle_task(current)) { - struct task_struct *idle = idle_task(smp_processor_id()); + struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", + RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"), rcu_dynticks_nesting, newval)); ftrace_dump(DUMP_ALL); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", @@ -120,15 +122,15 @@ EXPORT_SYMBOL_GPL(rcu_irq_exit); static void rcu_idle_exit_common(long long oldval) { if (oldval) { - RCU_TRACE(trace_rcu_dyntick("++=", + RCU_TRACE(trace_rcu_dyntick(TPS("++="), oldval, rcu_dynticks_nesting)); return; } - RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting)); + RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting)); if (!is_idle_task(current)) { - struct task_struct *idle = idle_task(smp_processor_id()); + struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task", + RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"), oldval, rcu_dynticks_nesting)); ftrace_dump(DUMP_ALL); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", @@ -174,18 +176,18 @@ void rcu_irq_enter(void) } EXPORT_SYMBOL_GPL(rcu_irq_enter); -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) /* * Test whether RCU thinks that the current CPU is idle. */ -int rcu_is_cpu_idle(void) +bool __rcu_is_watching(void) { - return !rcu_dynticks_nesting; + return rcu_dynticks_nesting; } -EXPORT_SYMBOL(rcu_is_cpu_idle); +EXPORT_SYMBOL(__rcu_is_watching); -#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ /* * Test whether the current CPU was interrupted from idle. Nested @@ -273,7 +275,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) if (&rcp->rcucblist == rcp->donetail) { RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1)); RCU_TRACE(trace_rcu_batch_end(rcp->name, 0, - ACCESS_ONCE(rcp->rcucblist), + !!ACCESS_ONCE(rcp->rcucblist), need_resched(), is_idle_task(current), false)); @@ -304,7 +306,8 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) RCU_TRACE(cb_count++); } RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); - RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(), + RCU_TRACE(trace_rcu_batch_end(rcp->name, + cb_count, 0, need_resched(), is_idle_task(current), false)); } diff --git a/kernel/rcutiny_plugin.h b/kernel/rcu/tiny_plugin.h index 280d06cae352..280d06cae352 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h diff --git a/kernel/rcutorture.c b/kernel/rcu/torture.c index be63101c6175..3929cd451511 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcu/torture.c @@ -52,6 +52,12 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>"); +MODULE_ALIAS("rcutorture"); +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "rcutorture." + static int fqs_duration; module_param(fqs_duration, int, 0444); MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us), 0 to disable"); diff --git a/kernel/rcutree.c b/kernel/rcu/tree.c index 1dc9f3604ad8..4c06ddfea7cd 100644 --- a/kernel/rcutree.c +++ b/kernel/rcu/tree.c @@ -41,6 +41,7 @@ #include <linux/export.h> #include <linux/completion.h> #include <linux/moduleparam.h> +#include <linux/module.h> #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/cpu.h> @@ -56,17 +57,16 @@ #include <linux/ftrace_event.h> #include <linux/suspend.h> -#include "rcutree.h" +#include "tree.h" #include <trace/events/rcu.h> #include "rcu.h" -/* - * Strings used in tracepoints need to be exported via the - * tracing system such that tools like perf and trace-cmd can - * translate the string address pointers to actual text. - */ -#define TPS(x) tracepoint_string(x) +MODULE_ALIAS("rcutree"); +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "rcutree." /* Data structures. */ @@ -222,7 +222,7 @@ void rcu_note_context_switch(int cpu) } EXPORT_SYMBOL_GPL(rcu_note_context_switch); -DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { +static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, .dynticks = ATOMIC_INIT(1), #ifdef CONFIG_NO_HZ_FULL_SYSIDLE @@ -371,7 +371,8 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, { trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); if (!user && !is_idle_task(current)) { - struct task_struct *idle = idle_task(smp_processor_id()); + struct task_struct *idle __maybe_unused = + idle_task(smp_processor_id()); trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0); ftrace_dump(DUMP_ORIG); @@ -407,7 +408,7 @@ static void rcu_eqs_enter(bool user) long long oldval; struct rcu_dynticks *rdtp; - rdtp = &__get_cpu_var(rcu_dynticks); + rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) @@ -435,7 +436,7 @@ void rcu_idle_enter(void) local_irq_save(flags); rcu_eqs_enter(false); - rcu_sysidle_enter(&__get_cpu_var(rcu_dynticks), 0); + rcu_sysidle_enter(this_cpu_ptr(&rcu_dynticks), 0); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_enter); @@ -478,7 +479,7 @@ void rcu_irq_exit(void) struct rcu_dynticks *rdtp; local_irq_save(flags); - rdtp = &__get_cpu_var(rcu_dynticks); + rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting--; WARN_ON_ONCE(rdtp->dynticks_nesting < 0); @@ -508,7 +509,8 @@ static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, rcu_cleanup_after_idle(smp_processor_id()); trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); if (!user && !is_idle_task(current)) { - struct task_struct *idle = idle_task(smp_processor_id()); + struct task_struct *idle __maybe_unused = + idle_task(smp_processor_id()); trace_rcu_dyntick(TPS("Error on exit: not idle task"), oldval, rdtp->dynticks_nesting); @@ -528,7 +530,7 @@ static void rcu_eqs_exit(bool user) struct rcu_dynticks *rdtp; long long oldval; - rdtp = &__get_cpu_var(rcu_dynticks); + rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; WARN_ON_ONCE(oldval < 0); if (oldval & DYNTICK_TASK_NEST_MASK) @@ -555,7 +557,7 @@ void rcu_idle_exit(void) local_irq_save(flags); rcu_eqs_exit(false); - rcu_sysidle_exit(&__get_cpu_var(rcu_dynticks), 0); + rcu_sysidle_exit(this_cpu_ptr(&rcu_dynticks), 0); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_exit); @@ -599,7 +601,7 @@ void rcu_irq_enter(void) long long oldval; local_irq_save(flags); - rdtp = &__get_cpu_var(rcu_dynticks); + rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting++; WARN_ON_ONCE(rdtp->dynticks_nesting == 0); @@ -620,7 +622,7 @@ void rcu_irq_enter(void) */ void rcu_nmi_enter(void) { - struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); if (rdtp->dynticks_nmi_nesting == 0 && (atomic_read(&rdtp->dynticks) & 0x1)) @@ -642,7 +644,7 @@ void rcu_nmi_enter(void) */ void rcu_nmi_exit(void) { - struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); if (rdtp->dynticks_nmi_nesting == 0 || --rdtp->dynticks_nmi_nesting != 0) @@ -655,21 +657,34 @@ void rcu_nmi_exit(void) } /** - * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle + * __rcu_is_watching - are RCU read-side critical sections safe? + * + * Return true if RCU is watching the running CPU, which means that + * this CPU can safely enter RCU read-side critical sections. Unlike + * rcu_is_watching(), the caller of __rcu_is_watching() must have at + * least disabled preemption. + */ +bool __rcu_is_watching(void) +{ + return atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1; +} + +/** + * rcu_is_watching - see if RCU thinks that the current CPU is idle * * If the current CPU is in its idle loop and is neither in an interrupt * or NMI handler, return true. */ -int rcu_is_cpu_idle(void) +bool rcu_is_watching(void) { int ret; preempt_disable(); - ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0; + ret = __rcu_is_watching(); preempt_enable(); return ret; } -EXPORT_SYMBOL(rcu_is_cpu_idle); +EXPORT_SYMBOL_GPL(rcu_is_watching); #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) @@ -703,7 +718,7 @@ bool rcu_lockdep_current_cpu_online(void) if (in_nmi()) return 1; preempt_disable(); - rdp = &__get_cpu_var(rcu_sched_data); + rdp = this_cpu_ptr(&rcu_sched_data); rnp = rdp->mynode; ret = (rdp->grpmask & rnp->qsmaskinit) || !rcu_scheduler_fully_active; @@ -723,7 +738,7 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); */ static int rcu_is_cpu_rrupt_from_idle(void) { - return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; + return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1; } /* @@ -802,8 +817,11 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, static void record_gp_stall_check_time(struct rcu_state *rsp) { - rsp->gp_start = jiffies; - rsp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check(); + unsigned long j = ACCESS_ONCE(jiffies); + + rsp->gp_start = j; + smp_wmb(); /* Record start time before stall time. */ + rsp->jiffies_stall = j + rcu_jiffies_till_stall_check(); } /* @@ -945,17 +963,48 @@ static void print_cpu_stall(struct rcu_state *rsp) static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) { + unsigned long completed; + unsigned long gpnum; + unsigned long gps; unsigned long j; unsigned long js; struct rcu_node *rnp; - if (rcu_cpu_stall_suppress) + if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp)) return; j = ACCESS_ONCE(jiffies); + + /* + * Lots of memory barriers to reject false positives. + * + * The idea is to pick up rsp->gpnum, then rsp->jiffies_stall, + * then rsp->gp_start, and finally rsp->completed. These values + * are updated in the opposite order with memory barriers (or + * equivalent) during grace-period initialization and cleanup. + * Now, a false positive can occur if we get an new value of + * rsp->gp_start and a old value of rsp->jiffies_stall. But given + * the memory barriers, the only way that this can happen is if one + * grace period ends and another starts between these two fetches. + * Detect this by comparing rsp->completed with the previous fetch + * from rsp->gpnum. + * + * Given this check, comparisons of jiffies, rsp->jiffies_stall, + * and rsp->gp_start suffice to forestall false positives. + */ + gpnum = ACCESS_ONCE(rsp->gpnum); + smp_rmb(); /* Pick up ->gpnum first... */ js = ACCESS_ONCE(rsp->jiffies_stall); + smp_rmb(); /* ...then ->jiffies_stall before the rest... */ + gps = ACCESS_ONCE(rsp->gp_start); + smp_rmb(); /* ...and finally ->gp_start before ->completed. */ + completed = ACCESS_ONCE(rsp->completed); + if (ULONG_CMP_GE(completed, gpnum) || + ULONG_CMP_LT(j, js) || + ULONG_CMP_GE(gps, js)) + return; /* No stall or GP completed since entering function. */ rnp = rdp->mynode; if (rcu_gp_in_progress(rsp) && - (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { + (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask)) { /* We haven't checked in, so go dump stack. */ print_cpu_stall(rsp); @@ -1310,7 +1359,7 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) } /* - * Initialize a new grace period. + * Initialize a new grace period. Return 0 if no grace period required. */ static int rcu_gp_init(struct rcu_state *rsp) { @@ -1319,18 +1368,27 @@ static int rcu_gp_init(struct rcu_state *rsp) rcu_bind_gp_kthread(); raw_spin_lock_irq(&rnp->lock); + if (rsp->gp_flags == 0) { + /* Spurious wakeup, tell caller to go back to sleep. */ + raw_spin_unlock_irq(&rnp->lock); + return 0; + } rsp->gp_flags = 0; /* Clear all flags: New grace period. */ - if (rcu_gp_in_progress(rsp)) { - /* Grace period already in progress, don't start another. */ + if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) { + /* + * Grace period already in progress, don't start another. + * Not supposed to be able to happen. + */ raw_spin_unlock_irq(&rnp->lock); return 0; } /* Advance to a new grace period and initialize state. */ + record_gp_stall_check_time(rsp); + smp_wmb(); /* Record GP times before starting GP. */ rsp->gpnum++; trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); - record_gp_stall_check_time(rsp); raw_spin_unlock_irq(&rnp->lock); /* Exclude any concurrent CPU-hotplug operations. */ @@ -1379,7 +1437,7 @@ static int rcu_gp_init(struct rcu_state *rsp) /* * Do one round of quiescent-state forcing. */ -int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) +static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) { int fqs_state = fqs_state_in; bool isidle = false; @@ -1464,8 +1522,12 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) rsp->fqs_state = RCU_GP_IDLE; rdp = this_cpu_ptr(rsp->rda); rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */ - if (cpu_needs_another_gp(rsp, rdp)) - rsp->gp_flags = 1; + if (cpu_needs_another_gp(rsp, rdp)) { + rsp->gp_flags = RCU_GP_FLAG_INIT; + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("newreq")); + } raw_spin_unlock_irq(&rnp->lock); } @@ -1475,6 +1537,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) static int __noreturn rcu_gp_kthread(void *arg) { int fqs_state; + int gf; unsigned long j; int ret; struct rcu_state *rsp = arg; @@ -1484,14 +1547,19 @@ static int __noreturn rcu_gp_kthread(void *arg) /* Handle grace-period start. */ for (;;) { + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("reqwait")); wait_event_interruptible(rsp->gp_wq, - rsp->gp_flags & + ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_INIT); - if ((rsp->gp_flags & RCU_GP_FLAG_INIT) && - rcu_gp_init(rsp)) + if (rcu_gp_init(rsp)) break; cond_resched(); flush_signals(current); + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("reqwaitsig")); } /* Handle quiescent-state forcing. */ @@ -1501,10 +1569,16 @@ static int __noreturn rcu_gp_kthread(void *arg) j = HZ; jiffies_till_first_fqs = HZ; } + ret = 0; for (;;) { - rsp->jiffies_force_qs = jiffies + j; + if (!ret) + rsp->jiffies_force_qs = jiffies + j; + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("fqswait")); ret = wait_event_interruptible_timeout(rsp->gp_wq, - (rsp->gp_flags & RCU_GP_FLAG_FQS) || + ((gf = ACCESS_ONCE(rsp->gp_flags)) & + RCU_GP_FLAG_FQS) || (!ACCESS_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp)), j); @@ -1513,13 +1587,23 @@ static int __noreturn rcu_gp_kthread(void *arg) !rcu_preempt_blocked_readers_cgp(rnp)) break; /* If time for quiescent-state forcing, do it. */ - if (ret == 0 || (rsp->gp_flags & RCU_GP_FLAG_FQS)) { + if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) || + (gf & RCU_GP_FLAG_FQS)) { + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("fqsstart")); fqs_state = rcu_gp_fqs(rsp, fqs_state); + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("fqsend")); cond_resched(); } else { /* Deal with stray signal. */ cond_resched(); flush_signals(current); + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("fqswaitsig")); } j = jiffies_till_next_fqs; if (j > HZ) { @@ -1567,6 +1651,8 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, return; } rsp->gp_flags = RCU_GP_FLAG_INIT; + trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), + TPS("newreq")); /* * We can't do wakeups while holding the rnp->lock, as that @@ -2268,7 +2354,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, * If called from an extended quiescent state, invoke the RCU * core in order to force a re-evaluation of RCU's idleness. */ - if (rcu_is_cpu_idle() && cpu_online(smp_processor_id())) + if (!rcu_is_watching() && cpu_online(smp_processor_id())) invoke_rcu_core(); /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ @@ -2738,10 +2824,13 @@ static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy) for_each_rcu_flavor(rsp) { rdp = per_cpu_ptr(rsp->rda, cpu); - if (rdp->qlen != rdp->qlen_lazy) + if (!rdp->nxtlist) + continue; + hc = true; + if (rdp->qlen != rdp->qlen_lazy || !all_lazy) { al = false; - if (rdp->nxtlist) - hc = true; + break; + } } if (all_lazy) *all_lazy = al; @@ -3229,7 +3318,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, /* * Compute the rcu_node tree geometry from kernel parameters. This cannot - * replace the definitions in rcutree.h because those are needed to size + * replace the definitions in tree.h because those are needed to size * the ->node array in the rcu_state structure. */ static void __init rcu_init_geometry(void) @@ -3308,8 +3397,8 @@ void __init rcu_init(void) rcu_bootup_announce(); rcu_init_geometry(); - rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data); + rcu_init_one(&rcu_sched_state, &rcu_sched_data); __rcu_init_preempt(); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); @@ -3324,4 +3413,4 @@ void __init rcu_init(void) rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); } -#include "rcutree_plugin.h" +#include "tree_plugin.h" diff --git a/kernel/rcutree.h b/kernel/rcu/tree.h index 5f97eab602cd..52be957c9fe2 100644 --- a/kernel/rcutree.h +++ b/kernel/rcu/tree.h @@ -104,6 +104,8 @@ struct rcu_dynticks { /* idle-period nonlazy_posted snapshot. */ unsigned long last_accelerate; /* Last jiffy CBs were accelerated. */ + unsigned long last_advance_all; + /* Last jiffy CBs were all advanced. */ int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ }; diff --git a/kernel/rcutree_plugin.h b/kernel/rcu/tree_plugin.h index 130c97b027f2..3822ac0c4b27 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -28,7 +28,7 @@ #include <linux/gfp.h> #include <linux/oom.h> #include <linux/smpboot.h> -#include "time/tick-internal.h" +#include "../time/tick-internal.h" #define RCU_KTHREAD_PRIO 1 @@ -96,10 +96,15 @@ static void __init rcu_bootup_announce_oddness(void) #endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */ #ifdef CONFIG_RCU_NOCB_CPU_ALL pr_info("\tOffload RCU callbacks from all CPUs\n"); - cpumask_setall(rcu_nocb_mask); + cpumask_copy(rcu_nocb_mask, cpu_possible_mask); #endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */ #endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */ if (have_rcu_nocb_mask) { + if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { + pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n"); + cpumask_and(rcu_nocb_mask, cpu_possible_mask, + rcu_nocb_mask); + } cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf); if (rcu_nocb_poll) @@ -660,7 +665,7 @@ static void rcu_preempt_check_callbacks(int cpu) static void rcu_preempt_do_callbacks(void) { - rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); + rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data)); } #endif /* #ifdef CONFIG_RCU_BOOST */ @@ -1128,7 +1133,7 @@ void exit_rcu(void) #ifdef CONFIG_RCU_BOOST -#include "rtmutex_common.h" +#include "../rtmutex_common.h" #ifdef CONFIG_RCU_TRACE @@ -1332,7 +1337,7 @@ static void invoke_rcu_callbacks_kthread(void) */ static bool rcu_is_callbacks_kthread(void) { - return __get_cpu_var(rcu_cpu_kthread_task) == current; + return __this_cpu_read(rcu_cpu_kthread_task) == current; } #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) @@ -1382,8 +1387,8 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, static void rcu_kthread_do_work(void) { - rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); - rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); + rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); + rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data)); rcu_preempt_do_callbacks(); } @@ -1402,7 +1407,7 @@ static void rcu_cpu_kthread_park(unsigned int cpu) static int rcu_cpu_kthread_should_run(unsigned int cpu) { - return __get_cpu_var(rcu_cpu_has_work); + return __this_cpu_read(rcu_cpu_has_work); } /* @@ -1412,8 +1417,8 @@ static int rcu_cpu_kthread_should_run(unsigned int cpu) */ static void rcu_cpu_kthread(unsigned int cpu) { - unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status); - char work, *workp = &__get_cpu_var(rcu_cpu_has_work); + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); int spincnt; for (spincnt = 0; spincnt < 10; spincnt++) { @@ -1630,17 +1635,23 @@ module_param(rcu_idle_lazy_gp_delay, int, 0644); extern int tick_nohz_enabled; /* - * Try to advance callbacks for all flavors of RCU on the current CPU. - * Afterwards, if there are any callbacks ready for immediate invocation, - * return true. + * Try to advance callbacks for all flavors of RCU on the current CPU, but + * only if it has been awhile since the last time we did so. Afterwards, + * if there are any callbacks ready for immediate invocation, return true. */ static bool rcu_try_advance_all_cbs(void) { bool cbs_ready = false; struct rcu_data *rdp; + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); struct rcu_node *rnp; struct rcu_state *rsp; + /* Exit early if we advanced recently. */ + if (jiffies == rdtp->last_advance_all) + return 0; + rdtp->last_advance_all = jiffies; + for_each_rcu_flavor(rsp) { rdp = this_cpu_ptr(rsp->rda); rnp = rdp->mynode; @@ -1739,6 +1750,8 @@ static void rcu_prepare_for_idle(int cpu) */ if (rdtp->all_lazy && rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) { + rdtp->all_lazy = false; + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; invoke_rcu_core(); return; } @@ -1768,17 +1781,11 @@ static void rcu_prepare_for_idle(int cpu) */ static void rcu_cleanup_after_idle(int cpu) { - struct rcu_data *rdp; - struct rcu_state *rsp; if (rcu_is_nocb_cpu(cpu)) return; - rcu_try_advance_all_cbs(); - for_each_rcu_flavor(rsp) { - rdp = per_cpu_ptr(rsp->rda, cpu); - if (cpu_has_callbacks_ready_to_invoke(rdp)) - invoke_rcu_core(); - } + if (rcu_try_advance_all_cbs()) + invoke_rcu_core(); } /* @@ -2108,15 +2115,22 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, /* If we are not being polled and there is a kthread, awaken it ... */ t = ACCESS_ONCE(rdp->nocb_kthread); - if (rcu_nocb_poll | !t) + if (rcu_nocb_poll || !t) { + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WakeNotPoll")); return; + } len = atomic_long_read(&rdp->nocb_q_count); if (old_rhpp == &rdp->nocb_head) { wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */ rdp->qlen_last_fqs_check = 0; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty")); } else if (len > rdp->qlen_last_fqs_check + qhimark) { wake_up_process(t); /* ... or if many callbacks queued. */ rdp->qlen_last_fqs_check = LONG_MAX / 2; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf")); + } else { + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot")); } return; } @@ -2140,10 +2154,12 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, if (__is_kfree_rcu_offset((unsigned long)rhp->func)) trace_rcu_kfree_callback(rdp->rsp->name, rhp, (unsigned long)rhp->func, - rdp->qlen_lazy, rdp->qlen); + -atomic_long_read(&rdp->nocb_q_count_lazy), + -atomic_long_read(&rdp->nocb_q_count)); else trace_rcu_callback(rdp->rsp->name, rhp, - rdp->qlen_lazy, rdp->qlen); + -atomic_long_read(&rdp->nocb_q_count_lazy), + -atomic_long_read(&rdp->nocb_q_count)); return 1; } @@ -2221,6 +2237,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) static int rcu_nocb_kthread(void *arg) { int c, cl; + bool firsttime = 1; struct rcu_head *list; struct rcu_head *next; struct rcu_head **tail; @@ -2229,14 +2246,27 @@ static int rcu_nocb_kthread(void *arg) /* Each pass through this loop invokes one batch of callbacks */ for (;;) { /* If not polling, wait for next batch of callbacks. */ - if (!rcu_nocb_poll) + if (!rcu_nocb_poll) { + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("Sleep")); wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head); + } else if (firsttime) { + firsttime = 0; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("Poll")); + } list = ACCESS_ONCE(rdp->nocb_head); if (!list) { + if (!rcu_nocb_poll) + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WokeEmpty")); schedule_timeout_interruptible(1); flush_signals(current); continue; } + firsttime = 1; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WokeNonEmpty")); /* * Extract queued callbacks, update counts, and wait @@ -2257,7 +2287,11 @@ static int rcu_nocb_kthread(void *arg) next = list->next; /* Wait for enqueuing to complete, if needed. */ while (next == NULL && &list->next != tail) { + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WaitQueue")); schedule_timeout_interruptible(1); + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WokeQueue")); next = list->next; } debug_rcu_head_unqueue(list); diff --git a/kernel/rcutree_trace.c b/kernel/rcu/tree_trace.c index cf6c17412932..3596797b7e46 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -44,7 +44,7 @@ #include <linux/seq_file.h> #define RCU_TREE_NONCORE -#include "rcutree.h" +#include "tree.h" static int r_open(struct inode *inode, struct file *file, const struct seq_operations *op) diff --git a/kernel/rcupdate.c b/kernel/rcu/update.c index b02a339836b4..6cb3dff89e2b 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcu/update.c @@ -53,6 +53,12 @@ #include "rcu.h" +MODULE_ALIAS("rcupdate"); +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "rcupdate." + module_param(rcu_expedited, int, 0); #ifdef CONFIG_PREEMPT_RCU @@ -148,7 +154,7 @@ int rcu_read_lock_bh_held(void) { if (!debug_lockdep_rcu_enabled()) return 1; - if (rcu_is_cpu_idle()) + if (!rcu_is_watching()) return 0; if (!rcu_lockdep_current_cpu_online()) return 0; @@ -298,7 +304,7 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); #endif int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ -int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; +static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; module_param(rcu_cpu_stall_suppress, int, 0644); module_param(rcu_cpu_stall_timeout, int, 0644); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a159e1fd2013..339c003314f4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -969,9 +969,10 @@ static struct ctl_table kern_table[] = { { .procname = "hung_task_check_count", .data = &sysctl_hung_task_check_count, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_doulongvec_minmax, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "hung_task_timeout_secs", diff --git a/lib/scatterlist.c b/lib/scatterlist.c index a685c8a79578..d16fa295ae1d 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -577,7 +577,8 @@ void sg_miter_stop(struct sg_mapping_iter *miter) miter->__offset += miter->consumed; miter->__remaining -= miter->consumed; - if (miter->__flags & SG_MITER_TO_SG) + if ((miter->__flags & SG_MITER_TO_SG) && + !PageSlab(miter->page)) flush_kernel_dcache_page(miter->page); if (miter->__flags & SG_MITER_ATOMIC) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 497ec33ff22d..13b9d0f221b8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -54,6 +54,7 @@ #include <linux/page_cgroup.h> #include <linux/cpu.h> #include <linux/oom.h> +#include <linux/lockdep.h> #include "internal.h" #include <net/sock.h> #include <net/ip.h> @@ -2046,6 +2047,12 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, return total; } +#ifdef CONFIG_LOCKDEP +static struct lockdep_map memcg_oom_lock_dep_map = { + .name = "memcg_oom_lock", +}; +#endif + static DEFINE_SPINLOCK(memcg_oom_lock); /* @@ -2083,7 +2090,8 @@ static bool mem_cgroup_oom_trylock(struct mem_cgroup *memcg) } iter->oom_lock = false; } - } + } else + mutex_acquire(&memcg_oom_lock_dep_map, 0, 1, _RET_IP_); spin_unlock(&memcg_oom_lock); @@ -2095,6 +2103,7 @@ static void mem_cgroup_oom_unlock(struct mem_cgroup *memcg) struct mem_cgroup *iter; spin_lock(&memcg_oom_lock); + mutex_release(&memcg_oom_lock_dep_map, 1, _RET_IP_); for_each_mem_cgroup_tree(iter, memcg) iter->oom_lock = false; spin_unlock(&memcg_oom_lock); @@ -2765,10 +2774,10 @@ done: *ptr = memcg; return 0; nomem: - *ptr = NULL; - if (gfp_mask & __GFP_NOFAIL) - return 0; - return -ENOMEM; + if (!(gfp_mask & __GFP_NOFAIL)) { + *ptr = NULL; + return -ENOMEM; + } bypass: *ptr = root_mem_cgroup; return -EINTR; @@ -3773,7 +3782,6 @@ void mem_cgroup_move_account_page_stat(struct mem_cgroup *from, { /* Update stat data for mem_cgroup */ preempt_disable(); - WARN_ON_ONCE(from->stat->count[idx] < nr_pages); __this_cpu_sub(from->stat->count[idx], nr_pages); __this_cpu_add(to->stat->count[idx], nr_pages); preempt_enable(); @@ -4950,31 +4958,18 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg) } while (usage > 0); } -/* - * This mainly exists for tests during the setting of set of use_hierarchy. - * Since this is the very setting we are changing, the current hierarchy value - * is meaningless - */ -static inline bool __memcg_has_children(struct mem_cgroup *memcg) -{ - struct cgroup_subsys_state *pos; - - /* bounce at first found */ - css_for_each_child(pos, &memcg->css) - return true; - return false; -} - -/* - * Must be called with memcg_create_mutex held, unless the cgroup is guaranteed - * to be already dead (as in mem_cgroup_force_empty, for instance). This is - * from mem_cgroup_count_children(), in the sense that we don't really care how - * many children we have; we only need to know if we have any. It also counts - * any memcg without hierarchy as infertile. - */ static inline bool memcg_has_children(struct mem_cgroup *memcg) { - return memcg->use_hierarchy && __memcg_has_children(memcg); + lockdep_assert_held(&memcg_create_mutex); + /* + * The lock does not prevent addition or deletion to the list + * of children, but it prevents a new child from being + * initialized based on this parent in css_online(), so it's + * enough to decide whether hierarchically inherited + * attributes can still be changed or not. + */ + return memcg->use_hierarchy && + !list_empty(&memcg->css.cgroup->children); } /* @@ -5054,7 +5049,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, */ if ((!parent_memcg || !parent_memcg->use_hierarchy) && (val == 1 || val == 0)) { - if (!__memcg_has_children(memcg)) + if (list_empty(&memcg->css.cgroup->children)) memcg->use_hierarchy = val; else retval = -EBUSY; diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 487ac6f37ca2..9a11f9f799f4 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -55,6 +55,7 @@ static struct sym_entry *table; static unsigned int table_size, table_cnt; static int all_symbols = 0; static char symbol_prefix_char = '\0'; +static unsigned long long kernel_start_addr = 0; int token_profit[0x10000]; @@ -65,7 +66,10 @@ unsigned char best_table_len[256]; static void usage(void) { - fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n"); + fprintf(stderr, "Usage: kallsyms [--all-symbols] " + "[--symbol-prefix=<prefix char>] " + "[--page-offset=<CONFIG_PAGE_OFFSET>] " + "< in.map > out.S\n"); exit(1); } @@ -194,6 +198,9 @@ static int symbol_valid(struct sym_entry *s) int i; int offset = 1; + if (s->addr < kernel_start_addr) + return 0; + /* skip prefix char */ if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char) offset++; @@ -646,6 +653,9 @@ int main(int argc, char **argv) if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\'')) p++; symbol_prefix_char = *p; + } else if (strncmp(argv[i], "--page-offset=", 14) == 0) { + const char *p = &argv[i][14]; + kernel_start_addr = strtoull(p, NULL, 16); } else usage(); } diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 014994936b1c..32b10f53d0b4 100644 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -82,6 +82,8 @@ kallsyms() kallsymopt="${kallsymopt} --all-symbols" fi + kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET" + local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} \ ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}" diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 17f45e8aa89c..e1e9e0c999fe 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -49,6 +49,8 @@ static struct snd_pcm *snd_pcm_get(struct snd_card *card, int device) struct snd_pcm *pcm; list_for_each_entry(pcm, &snd_pcm_devices, list) { + if (pcm->internal) + continue; if (pcm->card == card && pcm->device == device) return pcm; } @@ -60,6 +62,8 @@ static int snd_pcm_next(struct snd_card *card, int device) struct snd_pcm *pcm; list_for_each_entry(pcm, &snd_pcm_devices, list) { + if (pcm->internal) + continue; if (pcm->card == card && pcm->device > device) return pcm->device; else if (pcm->card->number > card->number) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bf313bea7085..8ad554312b69 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4623,6 +4623,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_ASUS_MODE4), + SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_ASUS_MODE4), SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT), SND_PCI_QUIRK(0x105b, 0x0cd6, "Foxconn", ALC662_FIXUP_ASUS_MODE2), SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), diff --git a/sound/soc/codecs/wm_hubs.c b/sound/soc/codecs/wm_hubs.c index 8b50e5958de5..01daf655e20b 100644 --- a/sound/soc/codecs/wm_hubs.c +++ b/sound/soc/codecs/wm_hubs.c @@ -530,6 +530,7 @@ static int hp_supply_event(struct snd_soc_dapm_widget *w, hubs->hp_startup_mode); break; } + break; case SND_SOC_DAPM_PRE_PMD: snd_soc_update_bits(codec, WM8993_CHARGE_PUMP_1, diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index c17c14c394df..b2949aed1ac2 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1949,7 +1949,7 @@ static ssize_t dapm_widget_power_read_file(struct file *file, w->active ? "active" : "inactive"); list_for_each_entry(p, &w->sources, list_sink) { - if (p->connected && !p->connected(w, p->sink)) + if (p->connected && !p->connected(w, p->source)) continue; if (p->connect) @@ -3495,6 +3495,7 @@ int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm, if (!w) { dev_err(dapm->dev, "ASoC: Failed to create %s widget\n", dai->driver->playback.stream_name); + return -ENOMEM; } w->priv = dai; @@ -3513,6 +3514,7 @@ int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm, if (!w) { dev_err(dapm->dev, "ASoC: Failed to create %s widget\n", dai->driver->capture.stream_name); + return -ENOMEM; } w->priv = dai; |