diff options
72 files changed, 2989 insertions, 690 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index f4855974f325..188809580d5e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4033,7 +4033,7 @@ S: Supported F: drivers/net/ethernet/chelsio/cxgb4vf/ CXL (IBM Coherent Accelerator Processor Interface CAPI) DRIVER -M: Frederic Barrat <fbarrat@linux.vnet.ibm.com> +M: Frederic Barrat <fbarrat@linux.ibm.com> M: Andrew Donnellan <andrew.donnellan@au1.ibm.com> L: linuxppc-dev@lists.ozlabs.org S: Supported @@ -4045,9 +4045,9 @@ F: Documentation/powerpc/cxl.txt F: Documentation/ABI/testing/sysfs-class-cxl CXLFLASH (IBM Coherent Accelerator Processor Interface CAPI Flash) SCSI DRIVER -M: Manoj N. Kumar <manoj@linux.vnet.ibm.com> -M: Matthew R. Ochs <mrochs@linux.vnet.ibm.com> -M: Uma Krishnan <ukrishn@linux.vnet.ibm.com> +M: Manoj N. Kumar <manoj@linux.ibm.com> +M: Matthew R. Ochs <mrochs@linux.ibm.com> +M: Uma Krishnan <ukrishn@linux.ibm.com> L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/cxlflash/ @@ -5428,7 +5428,7 @@ S: Orphan F: fs/efs/ EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER -M: Douglas Miller <dougmill@linux.vnet.ibm.com> +M: Douglas Miller <dougmill@linux.ibm.com> L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/ibm/ehea/ @@ -5565,7 +5565,7 @@ F: Documentation/filesystems/ext4/ext4.rst F: fs/ext4/ Extended Verification Module (EVM) -M: Mimi Zohar <zohar@linux.vnet.ibm.com> +M: Mimi Zohar <zohar@linux.ibm.com> L: linux-integrity@vger.kernel.org S: Supported F: security/integrity/evm/ @@ -5775,7 +5775,7 @@ F: include/linux/firmware.h FLASH ADAPTER DRIVER (IBM Flash Adapter 900GB Full Height PCI Flash Card) M: Joshua Morris <josh.h.morris@us.ibm.com> -M: Philip Kelleher <pjk1939@linux.vnet.ibm.com> +M: Philip Kelleher <pjk1939@linux.ibm.com> S: Maintained F: drivers/block/rsxx/ @@ -6042,7 +6042,7 @@ F: include/linux/fscrypt*.h F: Documentation/filesystems/fscrypt.rst FSI-ATTACHED I2C DRIVER -M: Eddie James <eajames@linux.vnet.ibm.com> +M: Eddie James <eajames@linux.ibm.com> L: linux-i2c@vger.kernel.org L: openbmc@lists.ozlabs.org (moderated for non-subscribers) S: Maintained @@ -6218,8 +6218,7 @@ S: Supported F: drivers/uio/uio_pci_generic.c GENWQE (IBM Generic Workqueue Card) -M: Frank Haverkamp <haver@linux.vnet.ibm.com> -M: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com> +M: Frank Haverkamp <haver@linux.ibm.com> S: Supported F: drivers/misc/genwqe/ @@ -7001,8 +7000,7 @@ F: crypto/842.c F: lib/842/ IBM Power in-Nest Crypto Acceleration -M: Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com> -M: Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com> +M: Paulo Flabiano Smorigo <pfsmorigo@linux.ibm.com> L: linux-crypto@vger.kernel.org S: Supported F: drivers/crypto/nx/Makefile @@ -7019,8 +7017,8 @@ S: Supported F: drivers/scsi/ipr.* IBM Power SRIOV Virtual NIC Device Driver -M: Thomas Falcon <tlfalcon@linux.vnet.ibm.com> -M: John Allen <jallen@linux.vnet.ibm.com> +M: Thomas Falcon <tlfalcon@linux.ibm.com> +M: John Allen <jallen@linux.ibm.com> L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/ibm/ibmvnic.* @@ -7035,41 +7033,38 @@ F: arch/powerpc/include/asm/vas.h F: arch/powerpc/include/uapi/asm/vas.h IBM Power Virtual Ethernet Device Driver -M: Thomas Falcon <tlfalcon@linux.vnet.ibm.com> +M: Thomas Falcon <tlfalcon@linux.ibm.com> L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/ibm/ibmveth.* IBM Power Virtual FC Device Drivers -M: Tyrel Datwyler <tyreld@linux.vnet.ibm.com> +M: Tyrel Datwyler <tyreld@linux.ibm.com> L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/ibmvscsi/ibmvfc* IBM Power Virtual Management Channel Driver -M: Bryant G. Ly <bryantly@linux.vnet.ibm.com> -M: Steven Royer <seroyer@linux.vnet.ibm.com> +M: Steven Royer <seroyer@linux.ibm.com> S: Supported F: drivers/misc/ibmvmc.* IBM Power Virtual SCSI Device Drivers -M: Tyrel Datwyler <tyreld@linux.vnet.ibm.com> +M: Tyrel Datwyler <tyreld@linux.ibm.com> L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/ibmvscsi/ibmvscsi* F: include/scsi/viosrp.h IBM Power Virtual SCSI Device Target Driver -M: Bryant G. Ly <bryantly@linux.vnet.ibm.com> -M: Michael Cyr <mikecyr@linux.vnet.ibm.com> +M: Michael Cyr <mikecyr@linux.ibm.com> L: linux-scsi@vger.kernel.org L: target-devel@vger.kernel.org S: Supported F: drivers/scsi/ibmvscsi_tgt/ IBM Power VMX Cryptographic instructions -M: Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com> -M: Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com> +M: Paulo Flabiano Smorigo <pfsmorigo@linux.ibm.com> L: linux-crypto@vger.kernel.org S: Supported F: drivers/crypto/vmx/Makefile @@ -7346,7 +7341,7 @@ S: Maintained L: linux-crypto@vger.kernel.org INTEGRITY MEASUREMENT ARCHITECTURE (IMA) -M: Mimi Zohar <zohar@linux.vnet.ibm.com> +M: Mimi Zohar <zohar@linux.ibm.com> M: Dmitry Kasatkin <dmitry.kasatkin@gmail.com> L: linux-integrity@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git @@ -7938,9 +7933,8 @@ S: Maintained F: drivers/media/platform/rcar_jpu.c JSM Neo PCI based serial card -M: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com> L: linux-serial@vger.kernel.org -S: Maintained +S: Orphan F: drivers/tty/serial/jsm/ K10TEMP HARDWARE MONITORING DRIVER @@ -8170,7 +8164,7 @@ F: include/uapi/linux/kexec.h F: kernel/kexec* KEYS-ENCRYPTED -M: Mimi Zohar <zohar@linux.vnet.ibm.com> +M: Mimi Zohar <zohar@linux.ibm.com> L: linux-integrity@vger.kernel.org L: keyrings@vger.kernel.org S: Supported @@ -8179,9 +8173,9 @@ F: include/keys/encrypted-type.h F: security/keys/encrypted-keys/ KEYS-TRUSTED -M: James Bottomley <jejb@linux.vnet.ibm.com> +M: James Bottomley <jejb@linux.ibm.com> M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> -M: Mimi Zohar <zohar@linux.vnet.ibm.com> +M: Mimi Zohar <zohar@linuxibm.com> L: linux-integrity@vger.kernel.org L: keyrings@vger.kernel.org S: Supported @@ -8234,7 +8228,7 @@ F: lib/test_kmod.c F: tools/testing/selftests/kmod/ KPROBES -M: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> +M: Naveen N. Rao <naveen.n.rao@linux.ibm.com> M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> M: "David S. Miller" <davem@davemloft.net> M: Masami Hiramatsu <mhiramat@kernel.org> @@ -8590,7 +8584,7 @@ M: Nicholas Piggin <npiggin@gmail.com> M: David Howells <dhowells@redhat.com> M: Jade Alglave <j.alglave@ucl.ac.uk> M: Luc Maranget <luc.maranget@inria.fr> -M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> +M: "Paul E. McKenney" <paulmck@linux.ibm.com> R: Akira Yokosawa <akiyks@gmail.com> R: Daniel Lustig <dlustig@nvidia.com> L: linux-kernel@vger.kernel.org @@ -9548,7 +9542,7 @@ F: drivers/platform/x86/mlx-platform.c MEMBARRIER SUPPORT M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> -M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> +M: "Paul E. McKenney" <paulmck@linux.ibm.com> L: linux-kernel@vger.kernel.org S: Supported F: kernel/sched/membarrier.c @@ -10686,7 +10680,7 @@ S: Supported F: tools/objtool/ OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER -M: Frederic Barrat <fbarrat@linux.vnet.ibm.com> +M: Frederic Barrat <fbarrat@linux.ibm.com> M: Andrew Donnellan <andrew.donnellan@au1.ibm.com> L: linuxppc-dev@lists.ozlabs.org S: Supported @@ -12487,7 +12481,7 @@ S: Orphan F: drivers/net/wireless/ray* RCUTORTURE TEST FRAMEWORK -M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> +M: "Paul E. McKenney" <paulmck@linux.ibm.com> M: Josh Triplett <josh@joshtriplett.org> R: Steven Rostedt <rostedt@goodmis.org> R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> @@ -12534,11 +12528,12 @@ F: arch/x86/include/asm/intel_rdt_sched.h F: Documentation/x86/intel_rdt* READ-COPY UPDATE (RCU) -M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> +M: "Paul E. McKenney" <paulmck@linux.ibm.com> M: Josh Triplett <josh@joshtriplett.org> R: Steven Rostedt <rostedt@goodmis.org> R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> R: Lai Jiangshan <jiangshanlai@gmail.com> +R: Joel Fernandes <joel@joelfernandes.org> L: linux-kernel@vger.kernel.org W: http://www.rdrop.com/users/paulmck/RCU/ S: Supported @@ -12674,7 +12669,7 @@ F: include/linux/reset-controller.h RESTARTABLE SEQUENCES SUPPORT M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> M: Peter Zijlstra <peterz@infradead.org> -M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> +M: "Paul E. McKenney" <paulmck@linux.ibm.com> M: Boqun Feng <boqun.feng@gmail.com> L: linux-kernel@vger.kernel.org S: Supported @@ -13199,7 +13194,7 @@ F: drivers/scsi/sg.c F: include/scsi/sg.h SCSI SUBSYSTEM -M: "James E.J. Bottomley" <jejb@linux.vnet.ibm.com> +M: "James E.J. Bottomley" <jejb@linux.ibm.com> T: git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git M: "Martin K. Petersen" <martin.petersen@oracle.com> T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git @@ -13634,7 +13629,7 @@ F: mm/sl?b* SLEEPABLE READ-COPY UPDATE (SRCU) M: Lai Jiangshan <jiangshanlai@gmail.com> -M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> +M: "Paul E. McKenney" <paulmck@linux.ibm.com> M: Josh Triplett <josh@joshtriplett.org> R: Steven Rostedt <rostedt@goodmis.org> R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> @@ -15057,7 +15052,7 @@ F: drivers/platform/x86/topstar-laptop.c TORTURE-TEST MODULES M: Davidlohr Bueso <dave@stgolabs.net> -M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> +M: "Paul E. McKenney" <paulmck@linux.ibm.com> M: Josh Triplett <josh@joshtriplett.org> L: linux-kernel@vger.kernel.org S: Supported diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8cf035e68378..4c01e9a01a74 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -289,7 +289,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) (*batchp)->ptes[(*batchp)->index++] = hugepte; if ((*batchp)->index == HUGEPD_FREELIST_SIZE) { - call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback); + call_rcu(&(*batchp)->rcu, hugepd_free_rcu_callback); *batchp = NULL; } put_cpu_var(hugepd_freelist_cur); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 76d89ee8b428..da64e4b9324e 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -350,7 +350,7 @@ void tlb_table_flush(struct mmu_gather *tlb) struct mmu_table_batch **batch = &tlb->batch; if (*batch) { - call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + call_rcu(&(*batch)->rcu, tlb_remove_table_rcu); *batch = NULL; } } diff --git a/arch/sparc/oprofile/init.c b/arch/sparc/oprofile/init.c index f9024bccff16..43730c9b1c86 100644 --- a/arch/sparc/oprofile/init.c +++ b/arch/sparc/oprofile/init.c @@ -53,7 +53,7 @@ static void timer_stop(void) { nmi_adjust_hz(1); unregister_die_notifier(&profile_timer_exceptions_nb); - synchronize_sched(); /* Allow already-started NMIs to complete. */ + synchronize_rcu(); /* Allow already-started NMIs to complete. */ } static int op_nmi_timer_init(struct oprofile_operations *ops) diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 8cd66152cdb0..9df652d3d927 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -59,7 +59,7 @@ static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev) { struct pcibios_fwaddrmap *map; - WARN_ON_SMP(!spin_is_locked(&pcibios_fwaddrmap_lock)); + lockdep_assert_held(&pcibios_fwaddrmap_lock); list_for_each_entry(map, &pcibios_fwaddrmappings, list) if (map->dev == dev) diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index f8ec3d4ba4a8..8eb3c4c9ff67 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -382,7 +382,7 @@ static int pcrypt_cpumask_change_notify(struct notifier_block *self, cpumask_copy(new_mask->mask, cpumask->cbcpu); rcu_assign_pointer(pcrypt->cb_cpumask, new_mask); - synchronize_rcu_bh(); + synchronize_rcu(); free_cpumask_var(old_mask->mask); kfree(old_mask); diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 677618e6f1f7..dc8603d34320 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2187,7 +2187,7 @@ static void shutdown_smi(void *send_info) * handlers might have been running before we freed the * interrupt. */ - synchronize_sched(); + synchronize_rcu(); /* * Timeouts are stopped, now make sure the interrupts are off diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 6d53f7d9fc7a..ffa9adeaba31 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -346,7 +346,7 @@ static inline void gov_clear_update_util(struct cpufreq_policy *policy) for_each_cpu(i, policy->cpus) cpufreq_remove_update_util_hook(i); - synchronize_sched(); + synchronize_rcu(); } static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9578312e43f2..ed124d72db76 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1930,7 +1930,7 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu) cpufreq_remove_update_util_hook(cpu); cpu_data->update_util_set = false; - synchronize_sched(); + synchronize_rcu(); } static int intel_pstate_get_max_freq(struct cpudata *cpu) diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index ffd68a7bc9e1..69d752f0b621 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -1661,7 +1661,7 @@ static void rtl8139_tx_timeout_task (struct work_struct *work) napi_disable(&tp->napi); netif_stop_queue(dev); - synchronize_sched(); + synchronize_rcu(); netdev_dbg(dev, "Transmit timeout, status %02x %04x %04x media %02x\n", RTL_R8(ChipCmd), RTL_R16(IntrStatus), diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 1fd01688d37b..4f1d89f0dc24 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -5866,7 +5866,7 @@ static void rtl_reset_work(struct rtl8169_private *tp) napi_disable(&tp->napi); netif_stop_queue(dev); - synchronize_sched(); + synchronize_rcu(); rtl8169_hw_reset(tp); @@ -6609,7 +6609,7 @@ static void rtl8169_down(struct net_device *dev) rtl8169_rx_missed(dev); /* Give a racing hard_start_xmit a few cycles to complete. */ - synchronize_sched(); + synchronize_rcu(); rtl8169_tx_clear(tp); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 98fe7e762e17..3643015a55cf 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -3167,7 +3167,7 @@ struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx, { u32 hash = efx_filter_spec_hash(spec); - WARN_ON(!spin_is_locked(&efx->rps_hash_lock)); + lockdep_assert_held(&efx->rps_hash_lock); if (!efx->rps_hash_table) return NULL; return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE]; diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index c2c50522b96d..808cf9816673 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -1142,7 +1142,7 @@ static void sis190_down(struct net_device *dev) if (!poll_locked) poll_locked++; - synchronize_sched(); + synchronize_rcu(); } while (SIS_R32(IntrMask)); diff --git a/drivers/net/ethernet/smsc/smsc911x.h b/drivers/net/ethernet/smsc/smsc911x.h index 8d75508acd2b..51b2fc1a395f 100644 --- a/drivers/net/ethernet/smsc/smsc911x.h +++ b/drivers/net/ethernet/smsc/smsc911x.h @@ -67,7 +67,7 @@ #ifdef CONFIG_DEBUG_SPINLOCK #define SMSC_ASSERT_MAC_LOCK(pdata) \ - WARN_ON_SMP(!spin_is_locked(&pdata->mac_lock)) + lockdep_assert_held(&pdata->mac_lock) #else #define SMSC_ASSERT_MAC_LOCK(pdata) do {} while (0) #endif /* CONFIG_DEBUG_SPINLOCK */ diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index ab11b2bee273..564ead864028 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1359,7 +1359,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) if (rx_sock) sockfd_put(rx_sock); /* Make sure no callbacks are outstanding */ - synchronize_rcu_bh(); + synchronize_rcu(); /* We do an extra flush before freeing memory, * since jobs can re-queue themselves. */ vhost_net_flush(n); diff --git a/fs/file.c b/fs/file.c index 7ffd6e9d103d..50304c7525ea 100644 --- a/fs/file.c +++ b/fs/file.c @@ -158,7 +158,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr) * or have finished their rcu_read_lock_sched() section. */ if (atomic_read(&files->count) > 1) - synchronize_sched(); + synchronize_rcu(); spin_lock(&files->file_lock); if (!new_fdt) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 356d2b8568c1..681881dc8a9d 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -926,7 +926,7 @@ static inline struct userfaultfd_wait_queue *find_userfault_in( wait_queue_entry_t *wq; struct userfaultfd_wait_queue *uwq; - VM_BUG_ON(!spin_is_locked(&wqh->lock)); + lockdep_assert_held(&wqh->lock); uwq = NULL; if (!waitqueue_active(wqh)) diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 79b99d653e03..71b75643c432 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -41,7 +41,7 @@ static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore * * cannot both change sem->state from readers_fast and start checking * counters while we are here. So if we see !sem->state, we know that * the writer won't be checking until we're past the preempt_enable() - * and that one the synchronize_sched() is done, the writer will see + * and that once the synchronize_rcu() is done, the writer will see * anything we did within this RCU-sched read-size critical section. */ __this_cpu_inc(*sem->read_count); diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h index 8a16c3eb3dd0..c0578ba23c1a 100644 --- a/include/linux/rcupdate_wait.h +++ b/include/linux/rcupdate_wait.h @@ -31,21 +31,4 @@ do { \ #define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) -/** - * synchronize_rcu_mult - Wait concurrently for multiple grace periods - * @...: List of call_rcu() functions for different grace periods to wait on - * - * This macro waits concurrently for multiple types of RCU grace periods. - * For example, synchronize_rcu_mult(call_rcu, call_rcu_tasks) would wait - * on concurrent RCU and RCU-tasks grace periods. Waiting on a give SRCU - * domain requires you to write a wrapper function for that SRCU domain's - * call_srcu() function, supplying the corresponding srcu_struct. - * - * If Tiny RCU, tell _wait_rcu_gp() does not bother waiting for RCU, - * given that anywhere synchronize_rcu_mult() can be called is automatically - * a grace period. - */ -#define synchronize_rcu_mult(...) \ - _wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__) - #endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index a51c13c2b1a0..e4c7b6241088 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -572,8 +572,10 @@ union rcu_special { struct { u8 blocked; u8 need_qs; + u8 exp_hint; /* Hint for performance. */ + u8 pad; /* No garbage from compiler! */ } b; /* Bits. */ - u16 s; /* Set of bits. */ + u32 s; /* Set of bits. */ }; enum perf_event_task_context { diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 67135d4a8a30..c614375cd264 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -38,20 +38,20 @@ struct srcu_struct; #ifdef CONFIG_DEBUG_LOCK_ALLOC -int __init_srcu_struct(struct srcu_struct *sp, const char *name, +int __init_srcu_struct(struct srcu_struct *ssp, const char *name, struct lock_class_key *key); -#define init_srcu_struct(sp) \ +#define init_srcu_struct(ssp) \ ({ \ static struct lock_class_key __srcu_key; \ \ - __init_srcu_struct((sp), #sp, &__srcu_key); \ + __init_srcu_struct((ssp), #ssp, &__srcu_key); \ }) #define __SRCU_DEP_MAP_INIT(srcu_name) .dep_map = { .name = #srcu_name }, #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -int init_srcu_struct(struct srcu_struct *sp); +int init_srcu_struct(struct srcu_struct *ssp); #define __SRCU_DEP_MAP_INIT(srcu_name) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ @@ -67,28 +67,28 @@ int init_srcu_struct(struct srcu_struct *sp); struct srcu_struct { }; #endif -void call_srcu(struct srcu_struct *sp, struct rcu_head *head, +void call_srcu(struct srcu_struct *ssp, struct rcu_head *head, void (*func)(struct rcu_head *head)); -void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced); -int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); -void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); -void synchronize_srcu(struct srcu_struct *sp); +void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced); +int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); +void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); +void synchronize_srcu(struct srcu_struct *ssp); /** * cleanup_srcu_struct - deconstruct a sleep-RCU structure - * @sp: structure to clean up. + * @ssp: structure to clean up. * * Must invoke this after you are finished using a given srcu_struct that * was initialized via init_srcu_struct(), else you leak memory. */ -static inline void cleanup_srcu_struct(struct srcu_struct *sp) +static inline void cleanup_srcu_struct(struct srcu_struct *ssp) { - _cleanup_srcu_struct(sp, false); + _cleanup_srcu_struct(ssp, false); } /** * cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure - * @sp: structure to clean up. + * @ssp: structure to clean up. * * Must invoke this after you are finished using a given srcu_struct that * was initialized via init_srcu_struct(), else you leak memory. Also, @@ -103,16 +103,16 @@ static inline void cleanup_srcu_struct(struct srcu_struct *sp) * (with high probability, anyway), and will also cause the srcu_struct * to be leaked. */ -static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp) +static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *ssp) { - _cleanup_srcu_struct(sp, true); + _cleanup_srcu_struct(ssp, true); } #ifdef CONFIG_DEBUG_LOCK_ALLOC /** * srcu_read_lock_held - might we be in SRCU read-side critical section? - * @sp: The srcu_struct structure to check + * @ssp: The srcu_struct structure to check * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an SRCU * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, @@ -126,16 +126,16 @@ static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp) * relies on normal RCU, it can be called from the CPU which * is in the idle loop from an RCU point of view or offline. */ -static inline int srcu_read_lock_held(const struct srcu_struct *sp) +static inline int srcu_read_lock_held(const struct srcu_struct *ssp) { if (!debug_lockdep_rcu_enabled()) return 1; - return lock_is_held(&sp->dep_map); + return lock_is_held(&ssp->dep_map); } #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -static inline int srcu_read_lock_held(const struct srcu_struct *sp) +static inline int srcu_read_lock_held(const struct srcu_struct *ssp) { return 1; } @@ -145,7 +145,7 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp) /** * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing * @p: the pointer to fetch and protect for later dereferencing - * @sp: pointer to the srcu_struct, which is used to check that we + * @ssp: pointer to the srcu_struct, which is used to check that we * really are in an SRCU read-side critical section. * @c: condition to check for update-side use * @@ -154,29 +154,32 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp) * to 1. The @c argument will normally be a logical expression containing * lockdep_is_held() calls. */ -#define srcu_dereference_check(p, sp, c) \ - __rcu_dereference_check((p), (c) || srcu_read_lock_held(sp), __rcu) +#define srcu_dereference_check(p, ssp, c) \ + __rcu_dereference_check((p), (c) || srcu_read_lock_held(ssp), __rcu) /** * srcu_dereference - fetch SRCU-protected pointer for later dereferencing * @p: the pointer to fetch and protect for later dereferencing - * @sp: pointer to the srcu_struct, which is used to check that we + * @ssp: pointer to the srcu_struct, which is used to check that we * really are in an SRCU read-side critical section. * * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU * is enabled, invoking this outside of an RCU read-side critical * section will result in an RCU-lockdep splat. */ -#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0) +#define srcu_dereference(p, ssp) srcu_dereference_check((p), (ssp), 0) /** * srcu_dereference_notrace - no tracing and no lockdep calls from here + * @p: the pointer to fetch and protect for later dereferencing + * @ssp: pointer to the srcu_struct, which is used to check that we + * really are in an SRCU read-side critical section. */ -#define srcu_dereference_notrace(p, sp) srcu_dereference_check((p), (sp), 1) +#define srcu_dereference_notrace(p, ssp) srcu_dereference_check((p), (ssp), 1) /** * srcu_read_lock - register a new reader for an SRCU-protected structure. - * @sp: srcu_struct in which to register the new reader. + * @ssp: srcu_struct in which to register the new reader. * * Enter an SRCU read-side critical section. Note that SRCU read-side * critical sections may be nested. However, it is illegal to @@ -191,44 +194,44 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp) * srcu_read_unlock() in an irq handler if the matching srcu_read_lock() * was invoked in process context. */ -static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) +static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { int retval; - retval = __srcu_read_lock(sp); - rcu_lock_acquire(&(sp)->dep_map); + retval = __srcu_read_lock(ssp); + rcu_lock_acquire(&(ssp)->dep_map); return retval; } /* Used by tracing, cannot be traced and cannot invoke lockdep. */ static inline notrace int -srcu_read_lock_notrace(struct srcu_struct *sp) __acquires(sp) +srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) { int retval; - retval = __srcu_read_lock(sp); + retval = __srcu_read_lock(ssp); return retval; } /** * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. - * @sp: srcu_struct in which to unregister the old reader. + * @ssp: srcu_struct in which to unregister the old reader. * @idx: return value from corresponding srcu_read_lock(). * * Exit an SRCU read-side critical section. */ -static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) - __releases(sp) +static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) + __releases(ssp) { - rcu_lock_release(&(sp)->dep_map); - __srcu_read_unlock(sp, idx); + rcu_lock_release(&(ssp)->dep_map); + __srcu_read_unlock(ssp, idx); } /* Used by tracing, cannot be traced and cannot call lockdep. */ static inline notrace void -srcu_read_unlock_notrace(struct srcu_struct *sp, int idx) __releases(sp) +srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) { - __srcu_read_unlock(sp, idx); + __srcu_read_unlock(ssp, idx); } /** diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index f41d2fb09f87..b19216aaaef2 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -60,7 +60,7 @@ void srcu_drive_gp(struct work_struct *wp); #define DEFINE_STATIC_SRCU(name) \ static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name) -void synchronize_srcu(struct srcu_struct *sp); +void synchronize_srcu(struct srcu_struct *ssp); /* * Counts the new reader in the appropriate per-CPU element of the @@ -68,36 +68,36 @@ void synchronize_srcu(struct srcu_struct *sp); * __srcu_read_unlock() must be in the same handler instance. Returns an * index that must be passed to the matching srcu_read_unlock(). */ -static inline int __srcu_read_lock(struct srcu_struct *sp) +static inline int __srcu_read_lock(struct srcu_struct *ssp) { int idx; - idx = READ_ONCE(sp->srcu_idx); - WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1); + idx = READ_ONCE(ssp->srcu_idx); + WRITE_ONCE(ssp->srcu_lock_nesting[idx], ssp->srcu_lock_nesting[idx] + 1); return idx; } -static inline void synchronize_srcu_expedited(struct srcu_struct *sp) +static inline void synchronize_srcu_expedited(struct srcu_struct *ssp) { - synchronize_srcu(sp); + synchronize_srcu(ssp); } -static inline void srcu_barrier(struct srcu_struct *sp) +static inline void srcu_barrier(struct srcu_struct *ssp) { - synchronize_srcu(sp); + synchronize_srcu(ssp); } /* Defined here to avoid size increase for non-torture kernels. */ -static inline void srcu_torture_stats_print(struct srcu_struct *sp, +static inline void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) { int idx; - idx = READ_ONCE(sp->srcu_idx) & 0x1; + idx = READ_ONCE(ssp->srcu_idx) & 0x1; pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n", tt, tf, idx, - READ_ONCE(sp->srcu_lock_nesting[!idx]), - READ_ONCE(sp->srcu_lock_nesting[idx])); + READ_ONCE(ssp->srcu_lock_nesting[!idx]), + READ_ONCE(ssp->srcu_lock_nesting[idx])); } #endif diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 0ae91b3a7406..6f292bd3e7db 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -51,7 +51,7 @@ struct srcu_data { unsigned long grpmask; /* Mask for leaf srcu_node */ /* ->srcu_data_have_cbs[]. */ int cpu; - struct srcu_struct *sp; + struct srcu_struct *ssp; }; /* @@ -138,8 +138,8 @@ struct srcu_struct { #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) -void synchronize_srcu_expedited(struct srcu_struct *sp); -void srcu_barrier(struct srcu_struct *sp); -void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf); +void synchronize_srcu_expedited(struct srcu_struct *ssp); +void srcu_barrier(struct srcu_struct *ssp); +void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf); #endif diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 538ba1a58f5b..432080b59c26 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -82,7 +82,7 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb) static inline void tracepoint_synchronize_unregister(void) { synchronize_srcu(&tracepoint_srcu); - synchronize_sched(); + synchronize_rcu(); } #else static inline void tracepoint_synchronize_unregister(void) diff --git a/include/linux/types.h b/include/linux/types.h index 9834e90aa010..c2615d6a019e 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -212,8 +212,8 @@ struct ustat { * weird ABI and we need to ask it explicitly. * * The alignment is required to guarantee that bit 0 of @next will be - * clear under normal conditions -- as long as we use call_rcu(), - * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback. + * clear under normal conditions -- as long as we use call_rcu() or + * call_srcu() to queue the callback. * * This guarantee is important for few reasons: * - future call_rcu_lazy() will make use of lower bits in the pointer; diff --git a/init/main.c b/init/main.c index ee147103ba1b..a45486330243 100644 --- a/init/main.c +++ b/init/main.c @@ -1046,12 +1046,12 @@ static void mark_readonly(void) { if (rodata_enabled) { /* - * load_module() results in W+X mappings, which are cleaned up - * with call_rcu_sched(). Let's make sure that queued work is + * load_module() results in W+X mappings, which are cleaned + * up with call_rcu(). Let's make sure that queued work is * flushed so that we don't hit false positives looking for * insecure pages which are W+X. */ - rcu_barrier_sched(); + rcu_barrier(); mark_rodata_ro(); rodata_test(); } else diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 6aaf5dd5383b..7a8429f8e280 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5343,7 +5343,7 @@ int __init cgroup_init(void) cgroup_rstat_boot(); /* - * The latency of the synchronize_sched() is too high for cgroups, + * The latency of the synchronize_rcu() is too high for cgroups, * avoid it at the cost of forcing all readers into the slow path. */ rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss); diff --git a/kernel/events/core.c b/kernel/events/core.c index 84530ab358c3..c4b90cf7734a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9918,7 +9918,7 @@ static void account_event(struct perf_event *event) * call the perf scheduling hooks before proceeding to * install events that need them. */ - synchronize_sched(); + synchronize_rcu(); } /* * Now that we have waited for the sync_sched(), allow further diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 90e98e233647..08e31d863191 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -229,7 +229,7 @@ static int collect_garbage_slots(struct kprobe_insn_cache *c) struct kprobe_insn_page *kip, *next; /* Ensure no-one is interrupted on the garbages */ - synchronize_sched(); + synchronize_rcu(); list_for_each_entry_safe(kip, next, &c->pages, list) { int i; @@ -1382,7 +1382,7 @@ out: if (ret) { ap->flags |= KPROBE_FLAG_DISABLED; list_del_rcu(&p->list); - synchronize_sched(); + synchronize_rcu(); } } } @@ -1597,7 +1597,7 @@ int register_kprobe(struct kprobe *p) ret = arm_kprobe(p); if (ret) { hlist_del_rcu(&p->hlist); - synchronize_sched(); + synchronize_rcu(); goto out; } } @@ -1776,7 +1776,7 @@ void unregister_kprobes(struct kprobe **kps, int num) kps[i]->addr = NULL; mutex_unlock(&kprobe_mutex); - synchronize_sched(); + synchronize_rcu(); for (i = 0; i < num; i++) if (kps[i]->addr) __unregister_kprobe_bottom(kps[i]); @@ -1966,7 +1966,7 @@ void unregister_kretprobes(struct kretprobe **rps, int num) rps[i]->kp.addr = NULL; mutex_unlock(&kprobe_mutex); - synchronize_sched(); + synchronize_rcu(); for (i = 0; i < num; i++) { if (rps[i]->kp.addr) { __unregister_kprobe_bottom(&rps[i]->kp); diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index 82d584225dc6..7702cb4064fc 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -61,7 +61,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, ops = container_of(fops, struct klp_ops, fops); /* - * A variant of synchronize_sched() is used to allow patching functions + * A variant of synchronize_rcu() is used to allow patching functions * where RCU is not watching, see klp_synchronize_transition(). */ preempt_disable_notrace(); @@ -72,7 +72,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, /* * func should never be NULL because preemption should be disabled here * and unregister_ftrace_function() does the equivalent of a - * synchronize_sched() before the func_stack removal. + * synchronize_rcu() before the func_stack removal. */ if (WARN_ON_ONCE(!func)) goto unlock; diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index 5bc349805e03..304d5eb8a98c 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -52,7 +52,7 @@ static DECLARE_DELAYED_WORK(klp_transition_work, klp_transition_work_fn); /* * This function is just a stub to implement a hard force - * of synchronize_sched(). This requires synchronizing + * of synchronize_rcu(). This requires synchronizing * tasks even in userspace and idle. */ static void klp_sync(struct work_struct *work) @@ -175,7 +175,7 @@ void klp_cancel_transition(void) void klp_update_patch_state(struct task_struct *task) { /* - * A variant of synchronize_sched() is used to allow patching functions + * A variant of synchronize_rcu() is used to allow patching functions * where RCU is not watching, see klp_synchronize_transition(). */ preempt_disable_notrace(); diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 1efada2dd9dd..ef27f98714c0 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4195,7 +4195,7 @@ void lockdep_free_key_range(void *start, unsigned long size) * * sync_sched() is sufficient because the read-side is IRQ disable. */ - synchronize_sched(); + synchronize_rcu(); /* * XXX at this point we could return the resources to the pool; diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 9aa713629387..771d4ca96dda 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -36,7 +36,7 @@ void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) { - SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); + lockdep_assert_held(&lock->wait_lock); DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list)); DEBUG_LOCKS_WARN_ON(waiter->magic != waiter); DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); @@ -51,7 +51,7 @@ void debug_mutex_free_waiter(struct mutex_waiter *waiter) void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct task_struct *task) { - SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); + lockdep_assert_held(&lock->wait_lock); /* Mark the current thread as blocked on the lock: */ task->blocked_on = waiter; diff --git a/kernel/module.c b/kernel/module.c index 49a405891587..99b46c32d579 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2159,7 +2159,7 @@ static void free_module(struct module *mod) /* Remove this module from bug list, this uses list_del_rcu */ module_bug_cleanup(mod); /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */ - synchronize_sched(); + synchronize_rcu(); mutex_unlock(&module_mutex); /* This may be empty, but that's OK */ @@ -3507,15 +3507,15 @@ static noinline int do_init_module(struct module *mod) /* * We want to free module_init, but be aware that kallsyms may be * walking this with preempt disabled. In all the failure paths, we - * call synchronize_sched(), but we don't want to slow down the success + * call synchronize_rcu(), but we don't want to slow down the success * path, so use actual RCU here. * Note that module_alloc() on most architectures creates W+X page * mappings which won't be cleaned up until do_free_init() runs. Any * code such as mark_rodata_ro() which depends on those mappings to * be cleaned up needs to sync with the queued work - ie - * rcu_barrier_sched() + * rcu_barrier() */ - call_rcu_sched(&freeinit->rcu, do_free_init); + call_rcu(&freeinit->rcu, do_free_init); mutex_unlock(&module_mutex); wake_up_all(&module_wq); @@ -3526,7 +3526,7 @@ fail_free_freeinit: fail: /* Try to protect us from buggy refcounters. */ mod->state = MODULE_STATE_GOING; - synchronize_sched(); + synchronize_rcu(); module_put(mod); blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod); @@ -3819,7 +3819,7 @@ static int load_module(struct load_info *info, const char __user *uargs, ddebug_cleanup: ftrace_release_mod(mod); dynamic_debug_remove(mod, info->debug); - synchronize_sched(); + synchronize_rcu(); kfree(mod->args); free_arch_cleanup: module_arch_cleanup(mod); @@ -3834,7 +3834,7 @@ static int load_module(struct load_info *info, const char __user *uargs, mod_tree_remove(mod); wake_up_all(&module_wq); /* Wait for RCU-sched synchronizing before releasing mod->list. */ - synchronize_sched(); + synchronize_rcu(); mutex_unlock(&module_mutex); free_module: /* Free lock-classes; relies on the preceding sync_rcu() */ diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index b46e6683f8c9..32dfd6522548 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -37,30 +37,30 @@ int rcu_scheduler_active __read_mostly; static LIST_HEAD(srcu_boot_list); static bool srcu_init_done; -static int init_srcu_struct_fields(struct srcu_struct *sp) +static int init_srcu_struct_fields(struct srcu_struct *ssp) { - sp->srcu_lock_nesting[0] = 0; - sp->srcu_lock_nesting[1] = 0; - init_swait_queue_head(&sp->srcu_wq); - sp->srcu_cb_head = NULL; - sp->srcu_cb_tail = &sp->srcu_cb_head; - sp->srcu_gp_running = false; - sp->srcu_gp_waiting = false; - sp->srcu_idx = 0; - INIT_WORK(&sp->srcu_work, srcu_drive_gp); - INIT_LIST_HEAD(&sp->srcu_work.entry); + ssp->srcu_lock_nesting[0] = 0; + ssp->srcu_lock_nesting[1] = 0; + init_swait_queue_head(&ssp->srcu_wq); + ssp->srcu_cb_head = NULL; + ssp->srcu_cb_tail = &ssp->srcu_cb_head; + ssp->srcu_gp_running = false; + ssp->srcu_gp_waiting = false; + ssp->srcu_idx = 0; + INIT_WORK(&ssp->srcu_work, srcu_drive_gp); + INIT_LIST_HEAD(&ssp->srcu_work.entry); return 0; } #ifdef CONFIG_DEBUG_LOCK_ALLOC -int __init_srcu_struct(struct srcu_struct *sp, const char *name, +int __init_srcu_struct(struct srcu_struct *ssp, const char *name, struct lock_class_key *key) { /* Don't re-initialize a lock while it is held. */ - debug_check_no_locks_freed((void *)sp, sizeof(*sp)); - lockdep_init_map(&sp->dep_map, name, key, 0); - return init_srcu_struct_fields(sp); + debug_check_no_locks_freed((void *)ssp, sizeof(*ssp)); + lockdep_init_map(&ssp->dep_map, name, key, 0); + return init_srcu_struct_fields(ssp); } EXPORT_SYMBOL_GPL(__init_srcu_struct); @@ -68,15 +68,15 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct); /* * init_srcu_struct - initialize a sleep-RCU structure - * @sp: structure to initialize. + * @ssp: structure to initialize. * * Must invoke this on a given srcu_struct before passing that srcu_struct * to any other function. Each srcu_struct represents a separate domain * of SRCU protection. */ -int init_srcu_struct(struct srcu_struct *sp) +int init_srcu_struct(struct srcu_struct *ssp) { - return init_srcu_struct_fields(sp); + return init_srcu_struct_fields(ssp); } EXPORT_SYMBOL_GPL(init_srcu_struct); @@ -84,22 +84,22 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); /* * cleanup_srcu_struct - deconstruct a sleep-RCU structure - * @sp: structure to clean up. + * @ssp: structure to clean up. * * Must invoke this after you are finished using a given srcu_struct that * was initialized via init_srcu_struct(), else you leak memory. */ -void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced) +void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced) { - WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]); + WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]); if (quiesced) - WARN_ON(work_pending(&sp->srcu_work)); + WARN_ON(work_pending(&ssp->srcu_work)); else - flush_work(&sp->srcu_work); - WARN_ON(sp->srcu_gp_running); - WARN_ON(sp->srcu_gp_waiting); - WARN_ON(sp->srcu_cb_head); - WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail); + flush_work(&ssp->srcu_work); + WARN_ON(ssp->srcu_gp_running); + WARN_ON(ssp->srcu_gp_waiting); + WARN_ON(ssp->srcu_cb_head); + WARN_ON(&ssp->srcu_cb_head != ssp->srcu_cb_tail); } EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); @@ -107,13 +107,13 @@ EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); * Removes the count for the old reader from the appropriate element of * the srcu_struct. */ -void __srcu_read_unlock(struct srcu_struct *sp, int idx) +void __srcu_read_unlock(struct srcu_struct *ssp, int idx) { - int newval = sp->srcu_lock_nesting[idx] - 1; + int newval = ssp->srcu_lock_nesting[idx] - 1; - WRITE_ONCE(sp->srcu_lock_nesting[idx], newval); - if (!newval && READ_ONCE(sp->srcu_gp_waiting)) - swake_up_one(&sp->srcu_wq); + WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval); + if (!newval && READ_ONCE(ssp->srcu_gp_waiting)) + swake_up_one(&ssp->srcu_wq); } EXPORT_SYMBOL_GPL(__srcu_read_unlock); @@ -127,24 +127,24 @@ void srcu_drive_gp(struct work_struct *wp) int idx; struct rcu_head *lh; struct rcu_head *rhp; - struct srcu_struct *sp; + struct srcu_struct *ssp; - sp = container_of(wp, struct srcu_struct, srcu_work); - if (sp->srcu_gp_running || !READ_ONCE(sp->srcu_cb_head)) + ssp = container_of(wp, struct srcu_struct, srcu_work); + if (ssp->srcu_gp_running || !READ_ONCE(ssp->srcu_cb_head)) return; /* Already running or nothing to do. */ /* Remove recently arrived callbacks and wait for readers. */ - WRITE_ONCE(sp->srcu_gp_running, true); + WRITE_ONCE(ssp->srcu_gp_running, true); local_irq_disable(); - lh = sp->srcu_cb_head; - sp->srcu_cb_head = NULL; - sp->srcu_cb_tail = &sp->srcu_cb_head; + lh = ssp->srcu_cb_head; + ssp->srcu_cb_head = NULL; + ssp->srcu_cb_tail = &ssp->srcu_cb_head; local_irq_enable(); - idx = sp->srcu_idx; - WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx); - WRITE_ONCE(sp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */ - swait_event_exclusive(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx])); - WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ + idx = ssp->srcu_idx; + WRITE_ONCE(ssp->srcu_idx, !ssp->srcu_idx); + WRITE_ONCE(ssp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */ + swait_event_exclusive(ssp->srcu_wq, !READ_ONCE(ssp->srcu_lock_nesting[idx])); + WRITE_ONCE(ssp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ /* Invoke the callbacks we removed above. */ while (lh) { @@ -161,9 +161,9 @@ void srcu_drive_gp(struct work_struct *wp) * at interrupt level, but the ->srcu_gp_running checks will * straighten that out. */ - WRITE_ONCE(sp->srcu_gp_running, false); - if (READ_ONCE(sp->srcu_cb_head)) - schedule_work(&sp->srcu_work); + WRITE_ONCE(ssp->srcu_gp_running, false); + if (READ_ONCE(ssp->srcu_cb_head)) + schedule_work(&ssp->srcu_work); } EXPORT_SYMBOL_GPL(srcu_drive_gp); @@ -171,7 +171,7 @@ EXPORT_SYMBOL_GPL(srcu_drive_gp); * Enqueue an SRCU callback on the specified srcu_struct structure, * initiating grace-period processing if it is not already running. */ -void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, +void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, rcu_callback_t func) { unsigned long flags; @@ -179,14 +179,14 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, rhp->func = func; rhp->next = NULL; local_irq_save(flags); - *sp->srcu_cb_tail = rhp; - sp->srcu_cb_tail = &rhp->next; + *ssp->srcu_cb_tail = rhp; + ssp->srcu_cb_tail = &rhp->next; local_irq_restore(flags); - if (!READ_ONCE(sp->srcu_gp_running)) { + if (!READ_ONCE(ssp->srcu_gp_running)) { if (likely(srcu_init_done)) - schedule_work(&sp->srcu_work); - else if (list_empty(&sp->srcu_work.entry)) - list_add(&sp->srcu_work.entry, &srcu_boot_list); + schedule_work(&ssp->srcu_work); + else if (list_empty(&ssp->srcu_work.entry)) + list_add(&ssp->srcu_work.entry, &srcu_boot_list); } } EXPORT_SYMBOL_GPL(call_srcu); @@ -194,13 +194,13 @@ EXPORT_SYMBOL_GPL(call_srcu); /* * synchronize_srcu - wait for prior SRCU read-side critical-section completion */ -void synchronize_srcu(struct srcu_struct *sp) +void synchronize_srcu(struct srcu_struct *ssp) { struct rcu_synchronize rs; init_rcu_head_on_stack(&rs.head); init_completion(&rs.completion); - call_srcu(sp, &rs.head, wakeme_after_rcu); + call_srcu(ssp, &rs.head, wakeme_after_rcu); wait_for_completion(&rs.completion); destroy_rcu_head_on_stack(&rs.head); } @@ -219,13 +219,13 @@ void __init rcu_scheduler_starting(void) */ void __init srcu_init(void) { - struct srcu_struct *sp; + struct srcu_struct *ssp; srcu_init_done = true; while (!list_empty(&srcu_boot_list)) { - sp = list_first_entry(&srcu_boot_list, + ssp = list_first_entry(&srcu_boot_list, struct srcu_struct, srcu_work.entry); - list_del_init(&sp->srcu_work.entry); - schedule_work(&sp->srcu_work); + list_del_init(&ssp->srcu_work.entry); + schedule_work(&ssp->srcu_work); } } diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index a8846ed7f352..3600d88d8956 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -56,7 +56,7 @@ static LIST_HEAD(srcu_boot_list); static bool __read_mostly srcu_init_done; static void srcu_invoke_callbacks(struct work_struct *work); -static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); +static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay); static void process_srcu(struct work_struct *work); /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ @@ -92,7 +92,7 @@ do { \ * srcu_read_unlock() running against them. So if the is_static parameter * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[]. */ -static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) +static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static) { int cpu; int i; @@ -103,13 +103,13 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) struct srcu_node *snp_first; /* Work out the overall tree geometry. */ - sp->level[0] = &sp->node[0]; + ssp->level[0] = &ssp->node[0]; for (i = 1; i < rcu_num_lvls; i++) - sp->level[i] = sp->level[i - 1] + num_rcu_lvl[i - 1]; + ssp->level[i] = ssp->level[i - 1] + num_rcu_lvl[i - 1]; rcu_init_levelspread(levelspread, num_rcu_lvl); /* Each pass through this loop initializes one srcu_node structure. */ - srcu_for_each_node_breadth_first(sp, snp) { + srcu_for_each_node_breadth_first(ssp, snp) { spin_lock_init(&ACCESS_PRIVATE(snp, lock)); WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != ARRAY_SIZE(snp->srcu_data_have_cbs)); @@ -120,17 +120,17 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) snp->srcu_gp_seq_needed_exp = 0; snp->grplo = -1; snp->grphi = -1; - if (snp == &sp->node[0]) { + if (snp == &ssp->node[0]) { /* Root node, special case. */ snp->srcu_parent = NULL; continue; } /* Non-root node. */ - if (snp == sp->level[level + 1]) + if (snp == ssp->level[level + 1]) level++; - snp->srcu_parent = sp->level[level - 1] + - (snp - sp->level[level]) / + snp->srcu_parent = ssp->level[level - 1] + + (snp - ssp->level[level]) / levelspread[level - 1]; } @@ -141,14 +141,14 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) != ARRAY_SIZE(sdp->srcu_unlock_count)); level = rcu_num_lvls - 1; - snp_first = sp->level[level]; + snp_first = ssp->level[level]; for_each_possible_cpu(cpu) { - sdp = per_cpu_ptr(sp->sda, cpu); + sdp = per_cpu_ptr(ssp->sda, cpu); spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); rcu_segcblist_init(&sdp->srcu_cblist); sdp->srcu_cblist_invoking = false; - sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; - sdp->srcu_gp_seq_needed_exp = sp->srcu_gp_seq; + sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq; + sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq; sdp->mynode = &snp_first[cpu / levelspread[level]]; for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) { if (snp->grplo < 0) @@ -157,7 +157,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) } sdp->cpu = cpu; INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks); - sdp->sp = sp; + sdp->ssp = ssp; sdp->grpmask = 1 << (cpu - sdp->mynode->grplo); if (is_static) continue; @@ -176,35 +176,35 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) * parameter is passed through to init_srcu_struct_nodes(), and * also tells us that ->sda has already been wired up to srcu_data. */ -static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static) +static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) { - mutex_init(&sp->srcu_cb_mutex); - mutex_init(&sp->srcu_gp_mutex); - sp->srcu_idx = 0; - sp->srcu_gp_seq = 0; - sp->srcu_barrier_seq = 0; - mutex_init(&sp->srcu_barrier_mutex); - atomic_set(&sp->srcu_barrier_cpu_cnt, 0); - INIT_DELAYED_WORK(&sp->work, process_srcu); + mutex_init(&ssp->srcu_cb_mutex); + mutex_init(&ssp->srcu_gp_mutex); + ssp->srcu_idx = 0; + ssp->srcu_gp_seq = 0; + ssp->srcu_barrier_seq = 0; + mutex_init(&ssp->srcu_barrier_mutex); + atomic_set(&ssp->srcu_barrier_cpu_cnt, 0); + INIT_DELAYED_WORK(&ssp->work, process_srcu); if (!is_static) - sp->sda = alloc_percpu(struct srcu_data); - init_srcu_struct_nodes(sp, is_static); - sp->srcu_gp_seq_needed_exp = 0; - sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); - smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */ - return sp->sda ? 0 : -ENOMEM; + ssp->sda = alloc_percpu(struct srcu_data); + init_srcu_struct_nodes(ssp, is_static); + ssp->srcu_gp_seq_needed_exp = 0; + ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); + smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */ + return ssp->sda ? 0 : -ENOMEM; } #ifdef CONFIG_DEBUG_LOCK_ALLOC -int __init_srcu_struct(struct srcu_struct *sp, const char *name, +int __init_srcu_struct(struct srcu_struct *ssp, const char *name, struct lock_class_key *key) { /* Don't re-initialize a lock while it is held. */ - debug_check_no_locks_freed((void *)sp, sizeof(*sp)); - lockdep_init_map(&sp->dep_map, name, key, 0); - spin_lock_init(&ACCESS_PRIVATE(sp, lock)); - return init_srcu_struct_fields(sp, false); + debug_check_no_locks_freed((void *)ssp, sizeof(*ssp)); + lockdep_init_map(&ssp->dep_map, name, key, 0); + spin_lock_init(&ACCESS_PRIVATE(ssp, lock)); + return init_srcu_struct_fields(ssp, false); } EXPORT_SYMBOL_GPL(__init_srcu_struct); @@ -212,16 +212,16 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct); /** * init_srcu_struct - initialize a sleep-RCU structure - * @sp: structure to initialize. + * @ssp: structure to initialize. * * Must invoke this on a given srcu_struct before passing that srcu_struct * to any other function. Each srcu_struct represents a separate domain * of SRCU protection. */ -int init_srcu_struct(struct srcu_struct *sp) +int init_srcu_struct(struct srcu_struct *ssp) { - spin_lock_init(&ACCESS_PRIVATE(sp, lock)); - return init_srcu_struct_fields(sp, false); + spin_lock_init(&ACCESS_PRIVATE(ssp, lock)); + return init_srcu_struct_fields(ssp, false); } EXPORT_SYMBOL_GPL(init_srcu_struct); @@ -231,37 +231,37 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); * First-use initialization of statically allocated srcu_struct * structure. Wiring up the combining tree is more than can be * done with compile-time initialization, so this check is added - * to each update-side SRCU primitive. Use sp->lock, which -is- + * to each update-side SRCU primitive. Use ssp->lock, which -is- * compile-time initialized, to resolve races involving multiple * CPUs trying to garner first-use privileges. */ -static void check_init_srcu_struct(struct srcu_struct *sp) +static void check_init_srcu_struct(struct srcu_struct *ssp) { unsigned long flags; /* The smp_load_acquire() pairs with the smp_store_release(). */ - if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/ + if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed))) /*^^^*/ return; /* Already initialized. */ - spin_lock_irqsave_rcu_node(sp, flags); - if (!rcu_seq_state(sp->srcu_gp_seq_needed)) { - spin_unlock_irqrestore_rcu_node(sp, flags); + spin_lock_irqsave_rcu_node(ssp, flags); + if (!rcu_seq_state(ssp->srcu_gp_seq_needed)) { + spin_unlock_irqrestore_rcu_node(ssp, flags); return; } - init_srcu_struct_fields(sp, true); - spin_unlock_irqrestore_rcu_node(sp, flags); + init_srcu_struct_fields(ssp, true); + spin_unlock_irqrestore_rcu_node(ssp, flags); } /* * Returns approximate total of the readers' ->srcu_lock_count[] values * for the rank of per-CPU counters specified by idx. */ -static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx) +static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) { int cpu; unsigned long sum = 0; for_each_possible_cpu(cpu) { - struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); + struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); sum += READ_ONCE(cpuc->srcu_lock_count[idx]); } @@ -272,13 +272,13 @@ static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx) * Returns approximate total of the readers' ->srcu_unlock_count[] values * for the rank of per-CPU counters specified by idx. */ -static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx) +static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) { int cpu; unsigned long sum = 0; for_each_possible_cpu(cpu) { - struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); + struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); sum += READ_ONCE(cpuc->srcu_unlock_count[idx]); } @@ -289,11 +289,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx) * Return true if the number of pre-existing readers is determined to * be zero. */ -static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) +static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) { unsigned long unlocks; - unlocks = srcu_readers_unlock_idx(sp, idx); + unlocks = srcu_readers_unlock_idx(ssp, idx); /* * Make sure that a lock is always counted if the corresponding @@ -329,25 +329,25 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) * of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient, * especially on 64-bit systems. */ - return srcu_readers_lock_idx(sp, idx) == unlocks; + return srcu_readers_lock_idx(ssp, idx) == unlocks; } /** * srcu_readers_active - returns true if there are readers. and false * otherwise - * @sp: which srcu_struct to count active readers (holding srcu_read_lock). + * @ssp: which srcu_struct to count active readers (holding srcu_read_lock). * * Note that this is not an atomic primitive, and can therefore suffer * severe errors when invoked on an active srcu_struct. That said, it * can be useful as an error check at cleanup time. */ -static bool srcu_readers_active(struct srcu_struct *sp) +static bool srcu_readers_active(struct srcu_struct *ssp) { int cpu; unsigned long sum = 0; for_each_possible_cpu(cpu) { - struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); + struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); sum += READ_ONCE(cpuc->srcu_lock_count[0]); sum += READ_ONCE(cpuc->srcu_lock_count[1]); @@ -363,44 +363,44 @@ static bool srcu_readers_active(struct srcu_struct *sp) * Return grace-period delay, zero if there are expedited grace * periods pending, SRCU_INTERVAL otherwise. */ -static unsigned long srcu_get_delay(struct srcu_struct *sp) +static unsigned long srcu_get_delay(struct srcu_struct *ssp) { - if (ULONG_CMP_LT(READ_ONCE(sp->srcu_gp_seq), - READ_ONCE(sp->srcu_gp_seq_needed_exp))) + if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), + READ_ONCE(ssp->srcu_gp_seq_needed_exp))) return 0; return SRCU_INTERVAL; } /* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */ -void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced) +void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced) { int cpu; - if (WARN_ON(!srcu_get_delay(sp))) + if (WARN_ON(!srcu_get_delay(ssp))) return; /* Just leak it! */ - if (WARN_ON(srcu_readers_active(sp))) + if (WARN_ON(srcu_readers_active(ssp))) return; /* Just leak it! */ if (quiesced) { - if (WARN_ON(delayed_work_pending(&sp->work))) + if (WARN_ON(delayed_work_pending(&ssp->work))) return; /* Just leak it! */ } else { - flush_delayed_work(&sp->work); + flush_delayed_work(&ssp->work); } for_each_possible_cpu(cpu) if (quiesced) { - if (WARN_ON(delayed_work_pending(&per_cpu_ptr(sp->sda, cpu)->work))) + if (WARN_ON(delayed_work_pending(&per_cpu_ptr(ssp->sda, cpu)->work))) return; /* Just leak it! */ } else { - flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work); + flush_delayed_work(&per_cpu_ptr(ssp->sda, cpu)->work); } - if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) || - WARN_ON(srcu_readers_active(sp))) { + if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) || + WARN_ON(srcu_readers_active(ssp))) { pr_info("%s: Active srcu_struct %p state: %d\n", - __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); + __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))); return; /* Caller forgot to stop doing call_srcu()? */ } - free_percpu(sp->sda); - sp->sda = NULL; + free_percpu(ssp->sda); + ssp->sda = NULL; } EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); @@ -409,12 +409,12 @@ EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); * srcu_struct. * Returns an index that must be passed to the matching srcu_read_unlock(). */ -int __srcu_read_lock(struct srcu_struct *sp) +int __srcu_read_lock(struct srcu_struct *ssp) { int idx; - idx = READ_ONCE(sp->srcu_idx) & 0x1; - this_cpu_inc(sp->sda->srcu_lock_count[idx]); + idx = READ_ONCE(ssp->srcu_idx) & 0x1; + this_cpu_inc(ssp->sda->srcu_lock_count[idx]); smp_mb(); /* B */ /* Avoid leaking the critical section. */ return idx; } @@ -425,10 +425,10 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); * element of the srcu_struct. Note that this may well be a different * CPU than that which was incremented by the corresponding srcu_read_lock(). */ -void __srcu_read_unlock(struct srcu_struct *sp, int idx) +void __srcu_read_unlock(struct srcu_struct *ssp, int idx) { smp_mb(); /* C */ /* Avoid leaking the critical section. */ - this_cpu_inc(sp->sda->srcu_unlock_count[idx]); + this_cpu_inc(ssp->sda->srcu_unlock_count[idx]); } EXPORT_SYMBOL_GPL(__srcu_read_unlock); @@ -444,20 +444,22 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); /* * Start an SRCU grace period. */ -static void srcu_gp_start(struct srcu_struct *sp) +static void srcu_gp_start(struct srcu_struct *ssp) { - struct srcu_data *sdp = this_cpu_ptr(sp->sda); + struct srcu_data *sdp = this_cpu_ptr(ssp->sda); int state; - lockdep_assert_held(&ACCESS_PRIVATE(sp, lock)); - WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); + lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock)); + WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)); + spin_lock_rcu_node(sdp); /* Interrupts already disabled. */ rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&sp->srcu_gp_seq)); + rcu_seq_current(&ssp->srcu_gp_seq)); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, - rcu_seq_snap(&sp->srcu_gp_seq)); + rcu_seq_snap(&ssp->srcu_gp_seq)); + spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */ smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ - rcu_seq_start(&sp->srcu_gp_seq); - state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); + rcu_seq_start(&ssp->srcu_gp_seq); + state = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)); WARN_ON_ONCE(state != SRCU_STATE_SCAN1); } @@ -511,7 +513,7 @@ static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay) * just-completed grace period, the one corresponding to idx. If possible, * schedule this invocation on the corresponding CPUs. */ -static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp, +static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp, unsigned long mask, unsigned long delay) { int cpu; @@ -519,7 +521,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp, for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { if (!(mask & (1 << (cpu - snp->grplo)))) continue; - srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), delay); + srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, cpu), delay); } } @@ -532,7 +534,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp, * are initiating callback invocation. This allows the ->srcu_have_cbs[] * array to have a finite number of elements. */ -static void srcu_gp_end(struct srcu_struct *sp) +static void srcu_gp_end(struct srcu_struct *ssp) { unsigned long cbdelay; bool cbs; @@ -546,28 +548,28 @@ static void srcu_gp_end(struct srcu_struct *sp) struct srcu_node *snp; /* Prevent more than one additional grace period. */ - mutex_lock(&sp->srcu_cb_mutex); + mutex_lock(&ssp->srcu_cb_mutex); /* End the current grace period. */ - spin_lock_irq_rcu_node(sp); - idx = rcu_seq_state(sp->srcu_gp_seq); + spin_lock_irq_rcu_node(ssp); + idx = rcu_seq_state(ssp->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); - cbdelay = srcu_get_delay(sp); - sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); - rcu_seq_end(&sp->srcu_gp_seq); - gpseq = rcu_seq_current(&sp->srcu_gp_seq); - if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq)) - sp->srcu_gp_seq_needed_exp = gpseq; - spin_unlock_irq_rcu_node(sp); - mutex_unlock(&sp->srcu_gp_mutex); + cbdelay = srcu_get_delay(ssp); + ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); + rcu_seq_end(&ssp->srcu_gp_seq); + gpseq = rcu_seq_current(&ssp->srcu_gp_seq); + if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq)) + ssp->srcu_gp_seq_needed_exp = gpseq; + spin_unlock_irq_rcu_node(ssp); + mutex_unlock(&ssp->srcu_gp_mutex); /* A new grace period can start at this point. But only one. */ /* Initiate callback invocation as needed. */ idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); - srcu_for_each_node_breadth_first(sp, snp) { + srcu_for_each_node_breadth_first(ssp, snp) { spin_lock_irq_rcu_node(snp); cbs = false; - last_lvl = snp >= sp->level[rcu_num_lvls - 1]; + last_lvl = snp >= ssp->level[rcu_num_lvls - 1]; if (last_lvl) cbs = snp->srcu_have_cbs[idx] == gpseq; snp->srcu_have_cbs[idx] = gpseq; @@ -578,12 +580,12 @@ static void srcu_gp_end(struct srcu_struct *sp) snp->srcu_data_have_cbs[idx] = 0; spin_unlock_irq_rcu_node(snp); if (cbs) - srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); + srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay); /* Occasionally prevent srcu_data counter wrap. */ if (!(gpseq & counter_wrap_check) && last_lvl) for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { - sdp = per_cpu_ptr(sp->sda, cpu); + sdp = per_cpu_ptr(ssp->sda, cpu); spin_lock_irqsave_rcu_node(sdp, flags); if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100)) @@ -596,18 +598,18 @@ static void srcu_gp_end(struct srcu_struct *sp) } /* Callback initiation done, allow grace periods after next. */ - mutex_unlock(&sp->srcu_cb_mutex); + mutex_unlock(&ssp->srcu_cb_mutex); /* Start a new grace period if needed. */ - spin_lock_irq_rcu_node(sp); - gpseq = rcu_seq_current(&sp->srcu_gp_seq); + spin_lock_irq_rcu_node(ssp); + gpseq = rcu_seq_current(&ssp->srcu_gp_seq); if (!rcu_seq_state(gpseq) && - ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) { - srcu_gp_start(sp); - spin_unlock_irq_rcu_node(sp); - srcu_reschedule(sp, 0); + ULONG_CMP_LT(gpseq, ssp->srcu_gp_seq_needed)) { + srcu_gp_start(ssp); + spin_unlock_irq_rcu_node(ssp); + srcu_reschedule(ssp, 0); } else { - spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(ssp); } } @@ -618,13 +620,13 @@ static void srcu_gp_end(struct srcu_struct *sp) * but without expediting. To start a completely new grace period, * whether expedited or not, use srcu_funnel_gp_start() instead. */ -static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, +static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp, unsigned long s) { unsigned long flags; for (; snp != NULL; snp = snp->srcu_parent) { - if (rcu_seq_done(&sp->srcu_gp_seq, s) || + if (rcu_seq_done(&ssp->srcu_gp_seq, s) || ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s)) return; spin_lock_irqsave_rcu_node(snp, flags); @@ -635,10 +637,10 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); spin_unlock_irqrestore_rcu_node(snp, flags); } - spin_lock_irqsave_rcu_node(sp, flags); - if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) - sp->srcu_gp_seq_needed_exp = s; - spin_unlock_irqrestore_rcu_node(sp, flags); + spin_lock_irqsave_rcu_node(ssp, flags); + if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s)) + ssp->srcu_gp_seq_needed_exp = s; + spin_unlock_irqrestore_rcu_node(ssp, flags); } /* @@ -651,7 +653,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, * Note that this function also does the work of srcu_funnel_exp_start(), * in some cases by directly invoking it. */ -static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, +static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, unsigned long s, bool do_norm) { unsigned long flags; @@ -661,7 +663,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, /* Each pass through the loop does one level of the srcu_node tree. */ for (; snp != NULL; snp = snp->srcu_parent) { - if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode) + if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != sdp->mynode) return; /* GP already done and CBs recorded. */ spin_lock_irqsave_rcu_node(snp, flags); if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) { @@ -676,7 +678,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, return; } if (!do_norm) - srcu_funnel_exp_start(sp, snp, s); + srcu_funnel_exp_start(ssp, snp, s); return; } snp->srcu_have_cbs[idx] = s; @@ -688,29 +690,29 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, } /* Top of tree, must ensure the grace period will be started. */ - spin_lock_irqsave_rcu_node(sp, flags); - if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) { + spin_lock_irqsave_rcu_node(ssp, flags); + if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed, s)) { /* * Record need for grace period s. Pair with load * acquire setting up for initialization. */ - smp_store_release(&sp->srcu_gp_seq_needed, s); /*^^^*/ + smp_store_release(&ssp->srcu_gp_seq_needed, s); /*^^^*/ } - if (!do_norm && ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) - sp->srcu_gp_seq_needed_exp = s; + if (!do_norm && ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s)) + ssp->srcu_gp_seq_needed_exp = s; /* If grace period not already done and none in progress, start it. */ - if (!rcu_seq_done(&sp->srcu_gp_seq, s) && - rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) { - WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); - srcu_gp_start(sp); + if (!rcu_seq_done(&ssp->srcu_gp_seq, s) && + rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) { + WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)); + srcu_gp_start(ssp); if (likely(srcu_init_done)) - queue_delayed_work(rcu_gp_wq, &sp->work, - srcu_get_delay(sp)); - else if (list_empty(&sp->work.work.entry)) - list_add(&sp->work.work.entry, &srcu_boot_list); + queue_delayed_work(rcu_gp_wq, &ssp->work, + srcu_get_delay(ssp)); + else if (list_empty(&ssp->work.work.entry)) + list_add(&ssp->work.work.entry, &srcu_boot_list); } - spin_unlock_irqrestore_rcu_node(sp, flags); + spin_unlock_irqrestore_rcu_node(ssp, flags); } /* @@ -718,12 +720,12 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, * loop an additional time if there is an expedited grace period pending. * The caller must ensure that ->srcu_idx is not changed while checking. */ -static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) +static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount) { for (;;) { - if (srcu_readers_active_idx_check(sp, idx)) + if (srcu_readers_active_idx_check(ssp, idx)) return true; - if (--trycount + !srcu_get_delay(sp) <= 0) + if (--trycount + !srcu_get_delay(ssp) <= 0) return false; udelay(SRCU_RETRY_CHECK_DELAY); } @@ -734,7 +736,7 @@ static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) * use the other rank of the ->srcu_(un)lock_count[] arrays. This allows * us to wait for pre-existing readers in a starvation-free manner. */ -static void srcu_flip(struct srcu_struct *sp) +static void srcu_flip(struct srcu_struct *ssp) { /* * Ensure that if this updater saw a given reader's increment @@ -746,7 +748,7 @@ static void srcu_flip(struct srcu_struct *sp) */ smp_mb(); /* E */ /* Pairs with B and C. */ - WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1); + WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); /* * Ensure that if the updater misses an __srcu_read_unlock() @@ -779,7 +781,7 @@ static void srcu_flip(struct srcu_struct *sp) * negligible when amoritized over that time period, and the extra latency * of a needlessly non-expedited grace period is similarly negligible. */ -static bool srcu_might_be_idle(struct srcu_struct *sp) +static bool srcu_might_be_idle(struct srcu_struct *ssp) { unsigned long curseq; unsigned long flags; @@ -788,7 +790,7 @@ static bool srcu_might_be_idle(struct srcu_struct *sp) /* If the local srcu_data structure has callbacks, not idle. */ local_irq_save(flags); - sdp = this_cpu_ptr(sp->sda); + sdp = this_cpu_ptr(ssp->sda); if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) { local_irq_restore(flags); return false; /* Callbacks already present, so not idle. */ @@ -804,17 +806,17 @@ static bool srcu_might_be_idle(struct srcu_struct *sp) /* First, see if enough time has passed since the last GP. */ t = ktime_get_mono_fast_ns(); if (exp_holdoff == 0 || - time_in_range_open(t, sp->srcu_last_gp_end, - sp->srcu_last_gp_end + exp_holdoff)) + time_in_range_open(t, ssp->srcu_last_gp_end, + ssp->srcu_last_gp_end + exp_holdoff)) return false; /* Too soon after last GP. */ /* Next, check for probable idleness. */ - curseq = rcu_seq_current(&sp->srcu_gp_seq); + curseq = rcu_seq_current(&ssp->srcu_gp_seq); smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */ - if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed))) + if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_gp_seq_needed))) return false; /* Grace period in progress, so not idle. */ smp_mb(); /* Order ->srcu_gp_seq with prior access. */ - if (curseq != rcu_seq_current(&sp->srcu_gp_seq)) + if (curseq != rcu_seq_current(&ssp->srcu_gp_seq)) return false; /* GP # changed, so not idle. */ return true; /* With reasonable probability, idle! */ } @@ -854,16 +856,17 @@ static void srcu_leak_callback(struct rcu_head *rhp) * srcu_read_lock(), and srcu_read_unlock() that are all passed the same * srcu_struct structure. */ -void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, +void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, rcu_callback_t func, bool do_norm) { unsigned long flags; + int idx; bool needexp = false; bool needgp = false; unsigned long s; struct srcu_data *sdp; - check_init_srcu_struct(sp); + check_init_srcu_struct(ssp); if (debug_rcu_head_queue(rhp)) { /* Probable double call_srcu(), so leak the callback. */ WRITE_ONCE(rhp->func, srcu_leak_callback); @@ -871,13 +874,14 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, return; } rhp->func = func; + idx = srcu_read_lock(ssp); local_irq_save(flags); - sdp = this_cpu_ptr(sp->sda); + sdp = this_cpu_ptr(ssp->sda); spin_lock_rcu_node(sdp); rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&sp->srcu_gp_seq)); - s = rcu_seq_snap(&sp->srcu_gp_seq); + rcu_seq_current(&ssp->srcu_gp_seq)); + s = rcu_seq_snap(&ssp->srcu_gp_seq); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s); if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { sdp->srcu_gp_seq_needed = s; @@ -889,14 +893,15 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, } spin_unlock_irqrestore_rcu_node(sdp, flags); if (needgp) - srcu_funnel_gp_start(sp, sdp, s, do_norm); + srcu_funnel_gp_start(ssp, sdp, s, do_norm); else if (needexp) - srcu_funnel_exp_start(sp, sdp->mynode, s); + srcu_funnel_exp_start(ssp, sdp->mynode, s); + srcu_read_unlock(ssp, idx); } /** * call_srcu() - Queue a callback for invocation after an SRCU grace period - * @sp: srcu_struct in queue the callback + * @ssp: srcu_struct in queue the callback * @rhp: structure to be used for queueing the SRCU callback. * @func: function to be invoked after the SRCU grace period * @@ -911,21 +916,21 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, * The callback will be invoked from process context, but must nevertheless * be fast and must not block. */ -void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, +void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, rcu_callback_t func) { - __call_srcu(sp, rhp, func, true); + __call_srcu(ssp, rhp, func, true); } EXPORT_SYMBOL_GPL(call_srcu); /* * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). */ -static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) +static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm) { struct rcu_synchronize rcu; - RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) || + RCU_LOCKDEP_WARN(lock_is_held(&ssp->dep_map) || lock_is_held(&rcu_bh_lock_map) || lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map), @@ -934,10 +939,10 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) return; might_sleep(); - check_init_srcu_struct(sp); + check_init_srcu_struct(ssp); init_completion(&rcu.completion); init_rcu_head_on_stack(&rcu.head); - __call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm); + __call_srcu(ssp, &rcu.head, wakeme_after_rcu, do_norm); wait_for_completion(&rcu.completion); destroy_rcu_head_on_stack(&rcu.head); @@ -953,7 +958,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) /** * synchronize_srcu_expedited - Brute-force SRCU grace period - * @sp: srcu_struct with which to synchronize. + * @ssp: srcu_struct with which to synchronize. * * Wait for an SRCU grace period to elapse, but be more aggressive about * spinning rather than blocking when waiting. @@ -961,15 +966,15 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) * Note that synchronize_srcu_expedited() has the same deadlock and * memory-ordering properties as does synchronize_srcu(). */ -void synchronize_srcu_expedited(struct srcu_struct *sp) +void synchronize_srcu_expedited(struct srcu_struct *ssp) { - __synchronize_srcu(sp, rcu_gp_is_normal()); + __synchronize_srcu(ssp, rcu_gp_is_normal()); } EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); /** * synchronize_srcu - wait for prior SRCU read-side critical-section completion - * @sp: srcu_struct with which to synchronize. + * @ssp: srcu_struct with which to synchronize. * * Wait for the count to drain to zero of both indexes. To avoid the * possible starvation of synchronize_srcu(), it waits for the count of @@ -1011,12 +1016,12 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); * SRCU must also provide it. Note that detecting idleness is heuristic * and subject to both false positives and negatives. */ -void synchronize_srcu(struct srcu_struct *sp) +void synchronize_srcu(struct srcu_struct *ssp) { - if (srcu_might_be_idle(sp) || rcu_gp_is_expedited()) - synchronize_srcu_expedited(sp); + if (srcu_might_be_idle(ssp) || rcu_gp_is_expedited()) + synchronize_srcu_expedited(ssp); else - __synchronize_srcu(sp, true); + __synchronize_srcu(ssp, true); } EXPORT_SYMBOL_GPL(synchronize_srcu); @@ -1026,36 +1031,36 @@ EXPORT_SYMBOL_GPL(synchronize_srcu); static void srcu_barrier_cb(struct rcu_head *rhp) { struct srcu_data *sdp; - struct srcu_struct *sp; + struct srcu_struct *ssp; sdp = container_of(rhp, struct srcu_data, srcu_barrier_head); - sp = sdp->sp; - if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) - complete(&sp->srcu_barrier_completion); + ssp = sdp->ssp; + if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) + complete(&ssp->srcu_barrier_completion); } /** * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. - * @sp: srcu_struct on which to wait for in-flight callbacks. + * @ssp: srcu_struct on which to wait for in-flight callbacks. */ -void srcu_barrier(struct srcu_struct *sp) +void srcu_barrier(struct srcu_struct *ssp) { int cpu; struct srcu_data *sdp; - unsigned long s = rcu_seq_snap(&sp->srcu_barrier_seq); + unsigned long s = rcu_seq_snap(&ssp->srcu_barrier_seq); - check_init_srcu_struct(sp); - mutex_lock(&sp->srcu_barrier_mutex); - if (rcu_seq_done(&sp->srcu_barrier_seq, s)) { + check_init_srcu_struct(ssp); + mutex_lock(&ssp->srcu_barrier_mutex); + if (rcu_seq_done(&ssp->srcu_barrier_seq, s)) { smp_mb(); /* Force ordering following return. */ - mutex_unlock(&sp->srcu_barrier_mutex); + mutex_unlock(&ssp->srcu_barrier_mutex); return; /* Someone else did our work for us. */ } - rcu_seq_start(&sp->srcu_barrier_seq); - init_completion(&sp->srcu_barrier_completion); + rcu_seq_start(&ssp->srcu_barrier_seq); + init_completion(&ssp->srcu_barrier_completion); /* Initial count prevents reaching zero until all CBs are posted. */ - atomic_set(&sp->srcu_barrier_cpu_cnt, 1); + atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); /* * Each pass through this loop enqueues a callback, but only @@ -1066,39 +1071,39 @@ void srcu_barrier(struct srcu_struct *sp) * grace period as the last callback already in the queue. */ for_each_possible_cpu(cpu) { - sdp = per_cpu_ptr(sp->sda, cpu); + sdp = per_cpu_ptr(ssp->sda, cpu); spin_lock_irq_rcu_node(sdp); - atomic_inc(&sp->srcu_barrier_cpu_cnt); + atomic_inc(&ssp->srcu_barrier_cpu_cnt); sdp->srcu_barrier_head.func = srcu_barrier_cb; debug_rcu_head_queue(&sdp->srcu_barrier_head); if (!rcu_segcblist_entrain(&sdp->srcu_cblist, &sdp->srcu_barrier_head, 0)) { debug_rcu_head_unqueue(&sdp->srcu_barrier_head); - atomic_dec(&sp->srcu_barrier_cpu_cnt); + atomic_dec(&ssp->srcu_barrier_cpu_cnt); } spin_unlock_irq_rcu_node(sdp); } /* Remove the initial count, at which point reaching zero can happen. */ - if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) - complete(&sp->srcu_barrier_completion); - wait_for_completion(&sp->srcu_barrier_completion); + if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) + complete(&ssp->srcu_barrier_completion); + wait_for_completion(&ssp->srcu_barrier_completion); - rcu_seq_end(&sp->srcu_barrier_seq); - mutex_unlock(&sp->srcu_barrier_mutex); + rcu_seq_end(&ssp->srcu_barrier_seq); + mutex_unlock(&ssp->srcu_barrier_mutex); } EXPORT_SYMBOL_GPL(srcu_barrier); /** * srcu_batches_completed - return batches completed. - * @sp: srcu_struct on which to report batch completion. + * @ssp: srcu_struct on which to report batch completion. * * Report the number of batches, correlated with, but not necessarily * precisely the same as, the number of grace periods that have elapsed. */ -unsigned long srcu_batches_completed(struct srcu_struct *sp) +unsigned long srcu_batches_completed(struct srcu_struct *ssp) { - return sp->srcu_idx; + return ssp->srcu_idx; } EXPORT_SYMBOL_GPL(srcu_batches_completed); @@ -1107,11 +1112,11 @@ EXPORT_SYMBOL_GPL(srcu_batches_completed); * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has * completed in that state. */ -static void srcu_advance_state(struct srcu_struct *sp) +static void srcu_advance_state(struct srcu_struct *ssp) { int idx; - mutex_lock(&sp->srcu_gp_mutex); + mutex_lock(&ssp->srcu_gp_mutex); /* * Because readers might be delayed for an extended period after @@ -1123,47 +1128,47 @@ static void srcu_advance_state(struct srcu_struct *sp) * The load-acquire ensures that we see the accesses performed * by the prior grace period. */ - idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */ + idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq)); /* ^^^ */ if (idx == SRCU_STATE_IDLE) { - spin_lock_irq_rcu_node(sp); - if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { - WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq)); - spin_unlock_irq_rcu_node(sp); - mutex_unlock(&sp->srcu_gp_mutex); + spin_lock_irq_rcu_node(ssp); + if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) { + WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq)); + spin_unlock_irq_rcu_node(ssp); + mutex_unlock(&ssp->srcu_gp_mutex); return; } - idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); + idx = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)); if (idx == SRCU_STATE_IDLE) - srcu_gp_start(sp); - spin_unlock_irq_rcu_node(sp); + srcu_gp_start(ssp); + spin_unlock_irq_rcu_node(ssp); if (idx != SRCU_STATE_IDLE) { - mutex_unlock(&sp->srcu_gp_mutex); + mutex_unlock(&ssp->srcu_gp_mutex); return; /* Someone else started the grace period. */ } } - if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) { - idx = 1 ^ (sp->srcu_idx & 1); - if (!try_check_zero(sp, idx, 1)) { - mutex_unlock(&sp->srcu_gp_mutex); + if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN1) { + idx = 1 ^ (ssp->srcu_idx & 1); + if (!try_check_zero(ssp, idx, 1)) { + mutex_unlock(&ssp->srcu_gp_mutex); return; /* readers present, retry later. */ } - srcu_flip(sp); - rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2); + srcu_flip(ssp); + rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2); } - if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) { + if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN2) { /* * SRCU read-side critical sections are normally short, * so check at least twice in quick succession after a flip. */ - idx = 1 ^ (sp->srcu_idx & 1); - if (!try_check_zero(sp, idx, 2)) { - mutex_unlock(&sp->srcu_gp_mutex); + idx = 1 ^ (ssp->srcu_idx & 1); + if (!try_check_zero(ssp, idx, 2)) { + mutex_unlock(&ssp->srcu_gp_mutex); return; /* readers present, retry later. */ } - srcu_gp_end(sp); /* Releases ->srcu_gp_mutex. */ + srcu_gp_end(ssp); /* Releases ->srcu_gp_mutex. */ } } @@ -1179,14 +1184,14 @@ static void srcu_invoke_callbacks(struct work_struct *work) struct rcu_cblist ready_cbs; struct rcu_head *rhp; struct srcu_data *sdp; - struct srcu_struct *sp; + struct srcu_struct *ssp; sdp = container_of(work, struct srcu_data, work.work); - sp = sdp->sp; + ssp = sdp->ssp; rcu_cblist_init(&ready_cbs); spin_lock_irq_rcu_node(sdp); rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&sp->srcu_gp_seq)); + rcu_seq_current(&ssp->srcu_gp_seq)); if (sdp->srcu_cblist_invoking || !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { spin_unlock_irq_rcu_node(sdp); @@ -1212,7 +1217,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) spin_lock_irq_rcu_node(sdp); rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, - rcu_seq_snap(&sp->srcu_gp_seq)); + rcu_seq_snap(&ssp->srcu_gp_seq)); sdp->srcu_cblist_invoking = false; more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); spin_unlock_irq_rcu_node(sdp); @@ -1224,24 +1229,24 @@ static void srcu_invoke_callbacks(struct work_struct *work) * Finished one round of SRCU grace period. Start another if there are * more SRCU callbacks queued, otherwise put SRCU into not-running state. */ -static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) +static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay) { bool pushgp = true; - spin_lock_irq_rcu_node(sp); - if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { - if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) { + spin_lock_irq_rcu_node(ssp); + if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) { + if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq))) { /* All requests fulfilled, time to go idle. */ pushgp = false; } - } else if (!rcu_seq_state(sp->srcu_gp_seq)) { + } else if (!rcu_seq_state(ssp->srcu_gp_seq)) { /* Outstanding request and no GP. Start one. */ - srcu_gp_start(sp); + srcu_gp_start(ssp); } - spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(ssp); if (pushgp) - queue_delayed_work(rcu_gp_wq, &sp->work, delay); + queue_delayed_work(rcu_gp_wq, &ssp->work, delay); } /* @@ -1249,41 +1254,41 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) */ static void process_srcu(struct work_struct *work) { - struct srcu_struct *sp; + struct srcu_struct *ssp; - sp = container_of(work, struct srcu_struct, work.work); + ssp = container_of(work, struct srcu_struct, work.work); - srcu_advance_state(sp); - srcu_reschedule(sp, srcu_get_delay(sp)); + srcu_advance_state(ssp); + srcu_reschedule(ssp, srcu_get_delay(ssp)); } void srcutorture_get_gp_data(enum rcutorture_type test_type, - struct srcu_struct *sp, int *flags, + struct srcu_struct *ssp, int *flags, unsigned long *gp_seq) { if (test_type != SRCU_FLAVOR) return; *flags = 0; - *gp_seq = rcu_seq_current(&sp->srcu_gp_seq); + *gp_seq = rcu_seq_current(&ssp->srcu_gp_seq); } EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); -void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf) +void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) { int cpu; int idx; unsigned long s0 = 0, s1 = 0; - idx = sp->srcu_idx & 0x1; + idx = ssp->srcu_idx & 0x1; pr_alert("%s%s Tree SRCU g%ld per-CPU(idx=%d):", - tt, tf, rcu_seq_current(&sp->srcu_gp_seq), idx); + tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), idx); for_each_possible_cpu(cpu) { unsigned long l0, l1; unsigned long u0, u1; long c0, c1; struct srcu_data *sdp; - sdp = per_cpu_ptr(sp->sda, cpu); + sdp = per_cpu_ptr(ssp->sda, cpu); u0 = sdp->srcu_unlock_count[!idx]; u1 = sdp->srcu_unlock_count[idx]; @@ -1318,14 +1323,14 @@ early_initcall(srcu_bootup_announce); void __init srcu_init(void) { - struct srcu_struct *sp; + struct srcu_struct *ssp; srcu_init_done = true; while (!list_empty(&srcu_boot_list)) { - sp = list_first_entry(&srcu_boot_list, struct srcu_struct, + ssp = list_first_entry(&srcu_boot_list, struct srcu_struct, work.work.entry); - check_init_srcu_struct(sp); - list_del_init(&sp->work.work.entry); - queue_work(rcu_gp_wq, &sp->work.work); + check_init_srcu_struct(ssp); + list_del_init(&ssp->work.work.entry); + queue_work(rcu_gp_wq, &ssp->work.work); } } diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 3f943efcf61c..be10036fa621 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -44,15 +44,15 @@ static const struct { __INIT_HELD(rcu_read_lock_held) }, [RCU_SCHED_SYNC] = { - .sync = synchronize_sched, - .call = call_rcu_sched, - .wait = rcu_barrier_sched, + .sync = synchronize_rcu, + .call = call_rcu, + .wait = rcu_barrier, __INIT_HELD(rcu_read_lock_sched_held) }, [RCU_BH_SYNC] = { - .sync = synchronize_rcu_bh, - .call = call_rcu_bh, - .wait = rcu_barrier_bh, + .sync = synchronize_rcu, + .call = call_rcu, + .wait = rcu_barrier, __INIT_HELD(rcu_read_lock_bh_held) }, }; @@ -125,8 +125,7 @@ void rcu_sync_enter(struct rcu_sync *rsp) rsp->gp_state = GP_PENDING; spin_unlock_irq(&rsp->rss_lock); - BUG_ON(need_wait && need_sync); - + WARN_ON_ONCE(need_wait && need_sync); if (need_sync) { gp_ops[rsp->gp_type].sync(); rsp->gp_state = GP_PASSED; @@ -139,7 +138,7 @@ void rcu_sync_enter(struct rcu_sync *rsp) * Nobody has yet been allowed the 'fast' path and thus we can * avoid doing any sync(). The callback will get 'dropped'. */ - BUG_ON(rsp->gp_state != GP_PASSED); + WARN_ON_ONCE(rsp->gp_state != GP_PASSED); } } @@ -166,8 +165,8 @@ static void rcu_sync_func(struct rcu_head *rhp) struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head); unsigned long flags; - BUG_ON(rsp->gp_state != GP_PASSED); - BUG_ON(rsp->cb_state == CB_IDLE); + WARN_ON_ONCE(rsp->gp_state != GP_PASSED); + WARN_ON_ONCE(rsp->cb_state == CB_IDLE); spin_lock_irqsave(&rsp->rss_lock, flags); if (rsp->gp_count) { @@ -225,7 +224,7 @@ void rcu_sync_dtor(struct rcu_sync *rsp) { int cb_state; - BUG_ON(rsp->gp_count); + WARN_ON_ONCE(rsp->gp_count); spin_lock_irq(&rsp->rss_lock); if (rsp->cb_state == CB_REPLAY) @@ -235,6 +234,6 @@ void rcu_sync_dtor(struct rcu_sync *rsp) if (cb_state != CB_IDLE) { gp_ops[rsp->gp_type].wait(); - BUG_ON(rsp->cb_state != CB_IDLE); + WARN_ON_ONCE(rsp->cb_state != CB_IDLE); } } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 121f833acd04..6ec3abbe90e2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -500,16 +500,29 @@ void rcu_force_quiescent_state(void) EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); /* + * Convert a ->gp_state value to a character string. + */ +static const char *gp_state_getname(short gs) +{ + if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names)) + return "???"; + return gp_state_names[gs]; +} + +/* * Show the state of the grace-period kthreads. */ void show_rcu_gp_kthreads(void) { int cpu; + unsigned long j; struct rcu_data *rdp; struct rcu_node *rnp; - pr_info("%s: wait state: %d ->state: %#lx\n", rcu_state.name, - rcu_state.gp_state, rcu_state.gp_kthread->state); + j = jiffies - READ_ONCE(rcu_state.gp_activity); + pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %ld\n", + rcu_state.name, gp_state_getname(rcu_state.gp_state), + rcu_state.gp_state, rcu_state.gp_kthread->state, j); rcu_for_each_node_breadth_first(rnp) { if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed)) continue; @@ -891,12 +904,12 @@ void rcu_irq_enter_irqson(void) } /** - * rcu_is_watching - see if RCU thinks that the current CPU is idle + * rcu_is_watching - see if RCU thinks that the current CPU is not idle * * Return true if RCU is watching the running CPU, which means that this * CPU can safely enter RCU read-side critical sections. In other words, - * if the current CPU is in its idle loop and is neither in an interrupt - * or NMI handler, return true. + * if the current CPU is not in its idle loop or is in an interrupt or + * NMI handler, return true. */ bool notrace rcu_is_watching(void) { @@ -1143,16 +1156,6 @@ static void record_gp_stall_check_time(void) } /* - * Convert a ->gp_state value to a character string. - */ -static const char *gp_state_getname(short gs) -{ - if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names)) - return "???"; - return gp_state_names[gs]; -} - -/* * Complain about starvation of grace-period kthread. */ static void rcu_check_gp_kthread_starvation(void) @@ -2032,9 +2035,9 @@ static void rcu_gp_cleanup(void) rnp = rcu_get_root(); raw_spin_lock_irq_rcu_node(rnp); /* GP before ->gp_seq update. */ - /* Declare grace period done. */ - rcu_seq_end(&rcu_state.gp_seq); + /* Declare grace period done, trace first to use old GP number. */ trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end")); + rcu_seq_end(&rcu_state.gp_seq); rcu_state.gp_state = RCU_GP_IDLE; /* Check for GP requests since above loop. */ rdp = this_cpu_ptr(&rcu_data); @@ -2600,10 +2603,10 @@ static void force_quiescent_state(void) * This function checks for grace-period requests that fail to motivate * RCU to come out of its idle mode. */ -static void -rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp) +void +rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp, + const unsigned long gpssdelay) { - const unsigned long gpssdelay = rcu_jiffies_till_stall_check() * HZ; unsigned long flags; unsigned long j; struct rcu_node *rnp_root = rcu_get_root(); @@ -2690,7 +2693,7 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused local_irq_restore(flags); } - rcu_check_gp_start_stall(rnp, rdp); + rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check()); /* If there are callbacks ready, invoke them. */ if (rcu_segcblist_ready_cbs(&rdp->cblist)) @@ -2826,7 +2829,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, int cpu, bool lazy) * Very early boot, before rcu_init(). Initialize if needed * and then drop through to queue the callback. */ - BUG_ON(cpu != -1); + WARN_ON_ONCE(cpu != -1); WARN_ON_ONCE(!rcu_is_watching()); if (rcu_segcblist_empty(&rdp->cblist)) rcu_segcblist_init(&rdp->cblist); @@ -3485,7 +3488,8 @@ static int __init rcu_spawn_gp_kthread(void) rcu_scheduler_fully_active = 1; t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name); - BUG_ON(IS_ERR(t)); + if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n", __func__)) + return 0; rnp = rcu_get_root(); raw_spin_lock_irqsave_rcu_node(rnp, flags); rcu_state.gp_kthread = t; diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 703e19ff532d..c3e2807a834a 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -57,7 +57,7 @@ struct rcu_node { /* some rcu_state fields as well as */ /* following. */ unsigned long gp_seq; /* Track rsp->rcu_gp_seq. */ - unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed. */ + unsigned long gp_seq_needed; /* Track furthest future GP request. */ unsigned long completedqs; /* All QSes done for this node. */ unsigned long qsmask; /* CPUs or groups that need to switch in */ /* order for current grace period to proceed.*/ @@ -163,7 +163,7 @@ union rcu_noqs { struct rcu_data { /* 1) quiescent-state and grace-period handling : */ unsigned long gp_seq; /* Track rsp->rcu_gp_seq counter. */ - unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed ctr. */ + unsigned long gp_seq_needed; /* Track furthest future GP request. */ union rcu_noqs cpu_no_qs; /* No QSes yet for this CPU. */ bool core_needs_qs; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ @@ -398,17 +398,6 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name; #define RCU_NAME rcu_name #endif /* #else #ifdef CONFIG_TRACING */ -/* - * RCU implementation internal declarations: - */ -extern struct rcu_state rcu_sched_state; - -extern struct rcu_state rcu_bh_state; - -#ifdef CONFIG_PREEMPT_RCU -extern struct rcu_state rcu_preempt_state; -#endif /* #ifdef CONFIG_PREEMPT_RCU */ - int rcu_dynticks_snap(struct rcu_data *rdp); #ifdef CONFIG_RCU_BOOST diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 8d18c1014e2b..928fe5893a57 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -450,10 +450,12 @@ static void sync_rcu_exp_select_cpus(smp_call_func_t func) } INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus); preempt_disable(); - cpu = cpumask_next(rnp->grplo - 1, cpu_online_mask); + cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1); /* If all offline, queue the work on an unbound CPU. */ - if (unlikely(cpu > rnp->grphi)) + if (unlikely(cpu > rnp->grphi - rnp->grplo)) cpu = WORK_CPU_UNBOUND; + else + cpu += rnp->grplo; queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work); preempt_enable(); rnp->exp_need_flush = true; @@ -690,8 +692,10 @@ static void sync_rcu_exp_handler(void *unused) */ if (t->rcu_read_lock_nesting > 0) { raw_spin_lock_irqsave_rcu_node(rnp, flags); - if (rnp->expmask & rdp->grpmask) + if (rnp->expmask & rdp->grpmask) { rdp->deferred_qs = true; + WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, true); + } raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 05915e536336..605ff3b06098 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -397,6 +397,11 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) return rnp->gp_tasks != NULL; } +/* Bias and limit values for ->rcu_read_lock_nesting. */ +#define RCU_NEST_BIAS INT_MAX +#define RCU_NEST_NMAX (-INT_MAX / 2) +#define RCU_NEST_PMAX (INT_MAX / 2) + /* * Preemptible RCU implementation for rcu_read_lock(). * Just increment ->rcu_read_lock_nesting, shared state will be updated @@ -405,6 +410,8 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) void __rcu_read_lock(void) { current->rcu_read_lock_nesting++; + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) + WARN_ON_ONCE(current->rcu_read_lock_nesting > RCU_NEST_PMAX); barrier(); /* critical section after entry code. */ } EXPORT_SYMBOL_GPL(__rcu_read_lock); @@ -424,20 +431,18 @@ void __rcu_read_unlock(void) --t->rcu_read_lock_nesting; } else { barrier(); /* critical section before exit code. */ - t->rcu_read_lock_nesting = INT_MIN; + t->rcu_read_lock_nesting = -RCU_NEST_BIAS; barrier(); /* assign before ->rcu_read_unlock_special load */ if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s))) rcu_read_unlock_special(t); barrier(); /* ->rcu_read_unlock_special load before assign */ t->rcu_read_lock_nesting = 0; } -#ifdef CONFIG_PROVE_LOCKING - { - int rrln = READ_ONCE(t->rcu_read_lock_nesting); + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + int rrln = t->rcu_read_lock_nesting; - WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); + WARN_ON_ONCE(rrln < 0 && rrln > RCU_NEST_NMAX); } -#endif /* #ifdef CONFIG_PROVE_LOCKING */ } EXPORT_SYMBOL_GPL(__rcu_read_unlock); @@ -597,7 +602,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) */ static bool rcu_preempt_need_deferred_qs(struct task_struct *t) { - return (this_cpu_ptr(&rcu_data)->deferred_qs || + return (__this_cpu_read(rcu_data.deferred_qs) || READ_ONCE(t->rcu_read_unlock_special.s)) && t->rcu_read_lock_nesting <= 0; } @@ -617,11 +622,11 @@ static void rcu_preempt_deferred_qs(struct task_struct *t) if (!rcu_preempt_need_deferred_qs(t)) return; if (couldrecurse) - t->rcu_read_lock_nesting -= INT_MIN; + t->rcu_read_lock_nesting -= RCU_NEST_BIAS; local_irq_save(flags); rcu_preempt_deferred_qs_irqrestore(t, flags); if (couldrecurse) - t->rcu_read_lock_nesting += INT_MIN; + t->rcu_read_lock_nesting += RCU_NEST_BIAS; } /* @@ -642,13 +647,21 @@ static void rcu_read_unlock_special(struct task_struct *t) local_irq_save(flags); irqs_were_disabled = irqs_disabled_flags(flags); - if ((preempt_bh_were_disabled || irqs_were_disabled) && - t->rcu_read_unlock_special.b.blocked) { + if (preempt_bh_were_disabled || irqs_were_disabled) { + WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false); /* Need to defer quiescent state until everything is enabled. */ - raise_softirq_irqoff(RCU_SOFTIRQ); + if (irqs_were_disabled) { + /* Enabling irqs does not reschedule, so... */ + raise_softirq_irqoff(RCU_SOFTIRQ); + } else { + /* Enabling BH or preempt does reschedule, so... */ + set_tsk_need_resched(current); + set_preempt_need_resched(); + } local_irq_restore(flags); return; } + WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false); rcu_preempt_deferred_qs_irqrestore(t, flags); } @@ -1464,7 +1477,8 @@ static void __init rcu_spawn_boost_kthreads(void) for_each_possible_cpu(cpu) per_cpu(rcu_cpu_has_work, cpu) = 0; - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); + if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__)) + return; rcu_for_each_leaf_node(rnp) (void)rcu_spawn_one_boost_kthread(rnp); } @@ -2322,7 +2336,8 @@ static int rcu_nocb_kthread(void *arg) tail = rdp->nocb_follower_tail; rdp->nocb_follower_tail = &rdp->nocb_follower_head; raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); - BUG_ON(!list); + if (WARN_ON_ONCE(!list)) + continue; trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeNonEmpty")); /* Each pass through the following loop invokes a callback. */ @@ -2495,7 +2510,8 @@ static void rcu_spawn_one_nocb_kthread(int cpu) /* Spawn the kthread for this CPU. */ t = kthread_run(rcu_nocb_kthread, rdp_spawn, "rcuo%c/%d", rcu_state.abbr, cpu); - BUG_ON(IS_ERR(t)); + if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo kthread, OOM is now expected behavior\n", __func__)) + return; WRITE_ONCE(rdp_spawn->nocb_kthread, t); } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index f203b94f6b5b..1971869c4072 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -335,8 +335,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, /* Initialize and register callbacks for each crcu_array element. */ for (i = 0; i < n; i++) { if (checktiny && - (crcu_array[i] == call_rcu || - crcu_array[i] == call_rcu_bh)) { + (crcu_array[i] == call_rcu)) { might_sleep(); continue; } @@ -352,8 +351,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, /* Wait for all callbacks to be invoked. */ for (i = 0; i < n; i++) { if (checktiny && - (crcu_array[i] == call_rcu || - crcu_array[i] == call_rcu_bh)) + (crcu_array[i] == call_rcu)) continue; for (j = 0; j < i; j++) if (crcu_array[j] == crcu_array[i]) @@ -822,7 +820,8 @@ static int __init rcu_spawn_tasks_kthread(void) struct task_struct *t; t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread"); - BUG_ON(IS_ERR(t)); + if (WARN_ONCE(IS_ERR(t), "%s: Could not start Tasks-RCU grace-period kthread, OOM is now expected behavior\n", __func__)) + return 0; smp_mb(); /* Ensure others see full kthread. */ WRITE_ONCE(rcu_tasks_kthread_ptr, t); return 0; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f12225f26b70..ea12ebc57840 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5788,7 +5788,7 @@ int sched_cpu_deactivate(unsigned int cpu) * * Do sync before park smpboot threads to take care the rcu boost case. */ - synchronize_rcu_mult(call_rcu, call_rcu_sched); + synchronize_rcu(); if (!sched_smp_initialized) return 0; diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 76e0eaf4654e..3cd8a3a795d2 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -210,7 +210,7 @@ static int membarrier_register_global_expedited(void) * future scheduler executions will observe the new * thread flag state for this mm. */ - synchronize_sched(); + synchronize_rcu(); } atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY, &mm->membarrier_state); @@ -246,7 +246,7 @@ static int membarrier_register_private_expedited(int flags) * Ensure all future scheduler executions will observe the * new thread flag state for this process. */ - synchronize_sched(); + synchronize_rcu(); } atomic_or(state, &mm->membarrier_state); @@ -298,7 +298,7 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) if (tick_nohz_full_enabled()) return -EINVAL; if (num_online_cpus() > 1) - synchronize_sched(); + synchronize_rcu(); return 0; case MEMBARRIER_CMD_GLOBAL_EXPEDITED: return membarrier_global_expedited(); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f536f601bd46..5b4f73e4fd56 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -173,7 +173,7 @@ static void ftrace_sync(struct work_struct *work) { /* * This function is just a stub to implement a hard force - * of synchronize_sched(). This requires synchronizing + * of synchronize_rcu(). This requires synchronizing * tasks even in userspace and idle. * * Yes, function tracing is rude. @@ -934,7 +934,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, ftrace_profile_enabled = 0; /* * unregister_ftrace_profiler calls stop_machine - * so this acts like an synchronize_sched. + * so this acts like an synchronize_rcu. */ unregister_ftrace_profiler(); } @@ -1086,7 +1086,7 @@ struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr) /* * Some of the ops may be dynamically allocated, - * they are freed after a synchronize_sched(). + * they are freed after a synchronize_rcu(). */ preempt_disable_notrace(); @@ -1286,7 +1286,7 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash) { if (!hash || hash == EMPTY_HASH) return; - call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu); + call_rcu(&hash->rcu, __free_ftrace_hash_rcu); } void ftrace_free_filter(struct ftrace_ops *ops) @@ -1501,7 +1501,7 @@ static bool hash_contains_ip(unsigned long ip, * the ip is not in the ops->notrace_hash. * * This needs to be called with preemption disabled as - * the hashes are freed with call_rcu_sched(). + * the hashes are freed with call_rcu(). */ static int ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) @@ -4496,7 +4496,7 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, if (ftrace_enabled && !ftrace_hash_empty(hash)) ftrace_run_modify_code(&probe->ops, FTRACE_UPDATE_CALLS, &old_hash_ops); - synchronize_sched(); + synchronize_rcu(); hlist_for_each_entry_safe(entry, tmp, &hhd, hlist) { hlist_del(&entry->hlist); @@ -5314,7 +5314,7 @@ ftrace_graph_release(struct inode *inode, struct file *file) mutex_unlock(&graph_lock); /* Wait till all users are no longer using the old hash */ - synchronize_sched(); + synchronize_rcu(); free_ftrace_hash(old_hash); } @@ -5707,7 +5707,7 @@ void ftrace_release_mod(struct module *mod) list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) { if (mod_map->mod == mod) { list_del_rcu(&mod_map->list); - call_rcu_sched(&mod_map->rcu, ftrace_free_mod_map); + call_rcu(&mod_map->rcu, ftrace_free_mod_map); break; } } @@ -5927,7 +5927,7 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, struct ftrace_mod_map *mod_map; const char *ret = NULL; - /* mod_map is freed via call_rcu_sched() */ + /* mod_map is freed via call_rcu() */ preempt_disable(); list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { ret = ftrace_func_address_lookup(mod_map, addr, size, off, sym); @@ -6262,7 +6262,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, /* * Some of the ops may be dynamically allocated, - * they must be freed after a synchronize_sched(). + * they must be freed after a synchronize_rcu(). */ preempt_disable_notrace(); @@ -6433,7 +6433,7 @@ static void clear_ftrace_pids(struct trace_array *tr) rcu_assign_pointer(tr->function_pids, NULL); /* Wait till all users are no longer using pid filtering */ - synchronize_sched(); + synchronize_rcu(); trace_free_pid_list(pid_list); } @@ -6580,7 +6580,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf, rcu_assign_pointer(tr->function_pids, pid_list); if (filtered_pids) { - synchronize_sched(); + synchronize_rcu(); trace_free_pid_list(filtered_pids); } else if (pid_list) { /* Register a probe to set whether to ignore the tracing of a task */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 65bd4616220d..4f3247a53259 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1834,7 +1834,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, * There could have been a race between checking * record_disable and incrementing it. */ - synchronize_sched(); + synchronize_rcu(); for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; rb_check_pages(cpu_buffer); @@ -3151,7 +3151,7 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) * This prevents all writes to the buffer. Any attempt to write * to the buffer after this will fail and return NULL. * - * The caller should call synchronize_sched() after this. + * The caller should call synchronize_rcu() after this. */ void ring_buffer_record_disable(struct ring_buffer *buffer) { @@ -3253,7 +3253,7 @@ bool ring_buffer_record_is_set_on(struct ring_buffer *buffer) * This prevents all writes to the buffer. Any attempt to write * to the buffer after this will fail and return NULL. * - * The caller should call synchronize_sched() after this. + * The caller should call synchronize_rcu() after this. */ void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu) { @@ -4191,7 +4191,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); void ring_buffer_read_prepare_sync(void) { - synchronize_sched(); + synchronize_rcu(); } EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); @@ -4363,7 +4363,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) atomic_inc(&cpu_buffer->record_disabled); /* Make sure all commits have finished */ - synchronize_sched(); + synchronize_rcu(); raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); @@ -4496,7 +4496,7 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, goto out; /* - * We can't do a synchronize_sched here because this + * We can't do a synchronize_rcu here because this * function can be called in atomic context. * Normally this will be called from the same CPU as cpu. * If not it's up to the caller to protect this. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ff1c4b20cd0a..51612b4a603f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1681,7 +1681,7 @@ void tracing_reset(struct trace_buffer *buf, int cpu) ring_buffer_record_disable(buffer); /* Make sure all commits have finished */ - synchronize_sched(); + synchronize_rcu(); ring_buffer_reset_cpu(buffer, cpu); ring_buffer_record_enable(buffer); @@ -1698,7 +1698,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf) ring_buffer_record_disable(buffer); /* Make sure all commits have finished */ - synchronize_sched(); + synchronize_rcu(); buf->time_start = buffer_ftrace_now(buf, buf->cpu); @@ -2250,7 +2250,7 @@ void trace_buffered_event_disable(void) preempt_enable(); /* Wait for all current users to finish */ - synchronize_sched(); + synchronize_rcu(); for_each_tracing_cpu(cpu) { free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); @@ -5398,7 +5398,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) if (tr->current_trace->reset) tr->current_trace->reset(tr); - /* Current trace needs to be nop_trace before synchronize_sched */ + /* Current trace needs to be nop_trace before synchronize_rcu */ tr->current_trace = &nop_trace; #ifdef CONFIG_TRACER_MAX_TRACE @@ -5412,7 +5412,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) * The update_max_tr is called from interrupts disabled * so a synchronized_sched() is sufficient. */ - synchronize_sched(); + synchronize_rcu(); free_snapshot(tr); } #endif diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 84a65173b1e9..35f3aa55be85 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1614,7 +1614,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir, /* * The calls can still be using the old filters. - * Do a synchronize_sched() and to ensure all calls are + * Do a synchronize_rcu() and to ensure all calls are * done with them before we free them. */ tracepoint_synchronize_unregister(); @@ -1845,7 +1845,7 @@ int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, if (filter) { /* * No event actually uses the system filter - * we can free it without synchronize_sched(). + * we can free it without synchronize_rcu(). */ __free_filter(system->filter); system->filter = filter; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index fec67188c4d2..adc153ab51c0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -333,7 +333,7 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) * event_call related objects, which will be accessed in * the kprobe_trace_func/kretprobe_trace_func. */ - synchronize_sched(); + synchronize_rcu(); kfree(link); /* Ignored if link == NULL */ } diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index a3be42304485..46f2ab1e08a9 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -92,7 +92,7 @@ static __init int release_early_probes(void) while (early_probes) { tmp = early_probes; early_probes = tmp->next; - call_rcu_sched(tmp, rcu_free_old_probes); + call_rcu(tmp, rcu_free_old_probes); } return 0; @@ -123,7 +123,7 @@ static inline void release_probes(struct tracepoint_func *old) * cover both cases. So let us chain the SRCU and sched RCU * callbacks to wait for both grace periods. */ - call_rcu_sched(&tp_probes->rcu, rcu_free_old_probes); + call_rcu(&tp_probes->rcu, rcu_free_old_probes); } } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0280deac392e..392be4b252f6 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3396,7 +3396,7 @@ static void put_unbound_pool(struct worker_pool *pool) del_timer_sync(&pool->mayday_timer); /* sched-RCU protected to allow dereferences from get_work_pool() */ - call_rcu_sched(&pool->rcu, rcu_free_pool); + call_rcu(&pool->rcu, rcu_free_pool); } /** @@ -3503,14 +3503,14 @@ static void pwq_unbound_release_workfn(struct work_struct *work) put_unbound_pool(pool); mutex_unlock(&wq_pool_mutex); - call_rcu_sched(&pwq->rcu, rcu_free_pwq); + call_rcu(&pwq->rcu, rcu_free_pwq); /* * If we're the last pwq going away, @wq is already dead and no one * is gonna access it anymore. Schedule RCU free. */ if (is_last) - call_rcu_sched(&wq->rcu, rcu_free_wq); + call_rcu(&wq->rcu, rcu_free_wq); } /** @@ -4195,7 +4195,7 @@ void destroy_workqueue(struct workqueue_struct *wq) * The base ref is never dropped on per-cpu pwqs. Directly * schedule RCU free. */ - call_rcu_sched(&wq->rcu, rcu_free_wq); + call_rcu(&wq->rcu, rcu_free_wq); } else { /* * We're the sole accessor of @wq at this point. Directly diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index de10b8c0bff6..9877682e49c7 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -181,7 +181,7 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch; percpu_ref_get(ref); /* put after confirmation */ - call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu); + call_rcu(&ref->rcu, percpu_ref_switch_to_atomic_rcu); } static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index c13625c1ad5e..7b86600a47c9 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1225,7 +1225,7 @@ static void collect_mm_slot(struct mm_slot *mm_slot) { struct mm_struct *mm = mm_slot->mm; - VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock)); + lockdep_assert_held(&khugepaged_mm_lock); if (khugepaged_test_exit(mm)) { /* free mm_slot */ @@ -1631,7 +1631,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int progress = 0; VM_BUG_ON(!pages); - VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock)); + lockdep_assert_held(&khugepaged_mm_lock); if (khugepaged_scan.mm_slot) mm_slot = khugepaged_scan.mm_slot; diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 2a9fbc4a37d5..f2f03c655807 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -199,7 +199,7 @@ void tlb_table_flush(struct mmu_gather *tlb) if (*batch) { tlb_table_invalidate(tlb); - call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + call_rcu(&(*batch)->rcu, tlb_remove_table_rcu); *batch = NULL; } } diff --git a/mm/slab.c b/mm/slab.c index 2a5654bb3b3f..3abb9feb3818 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -962,10 +962,10 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep, * To protect lockless access to n->shared during irq disabled context. * If n->shared isn't NULL in irq disabled context, accessing to it is * guaranteed to be valid until irq is re-enabled, because it will be - * freed after synchronize_sched(). + * freed after synchronize_rcu(). */ if (old_shared && force_change) - synchronize_sched(); + synchronize_rcu(); fail: kfree(old_shared); diff --git a/mm/slab_common.c b/mm/slab_common.c index 7eb8dc136c1c..9c11e8a937d2 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -724,7 +724,7 @@ void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s, css_get(&s->memcg_params.memcg->css); s->memcg_params.deact_fn = deact_fn; - call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn); + call_rcu(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn); } void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) @@ -839,11 +839,11 @@ static void flush_memcg_workqueue(struct kmem_cache *s) mutex_unlock(&slab_mutex); /* - * SLUB deactivates the kmem_caches through call_rcu_sched. Make + * SLUB deactivates the kmem_caches through call_rcu. Make * sure all registered rcu callbacks have been invoked. */ if (IS_ENABLED(CONFIG_SLUB)) - rcu_barrier_sched(); + rcu_barrier(); /* * SLAB and SLUB create memcg kmem_caches through workqueue and SLUB diff --git a/mm/swap.c b/mm/swap.c index aa483719922e..5d786019eab9 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -823,8 +823,7 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, VM_BUG_ON_PAGE(!PageHead(page), page); VM_BUG_ON_PAGE(PageCompound(page_tail), page); VM_BUG_ON_PAGE(PageLRU(page_tail), page); - VM_BUG_ON(NR_CPUS != 1 && - !spin_is_locked(&lruvec_pgdat(lruvec)->lru_lock)); + lockdep_assert_held(&lruvec_pgdat(lruvec)->lru_lock); if (!list) SetPageLRU(page_tail); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index a7ea2d431714..596ec6e7df11 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -728,7 +728,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); - call_rcu_bh(&p->rcu, br_multicast_free_pg); + call_rcu(&p->rcu, br_multicast_free_pg); err = 0; if (!mp->ports && !mp->host_joined && diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 6bac0d6b7b94..0255223f2001 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -260,7 +260,7 @@ static void br_multicast_group_expired(struct timer_list *t) hlist_del_rcu(&mp->hlist[mdb->ver]); mdb->size--; - call_rcu_bh(&mp->rcu, br_multicast_free_group); + call_rcu(&mp->rcu, br_multicast_free_group); out: spin_unlock(&br->multicast_lock); @@ -291,7 +291,7 @@ static void br_multicast_del_pg(struct net_bridge *br, del_timer(&p->timer); br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB, p->flags); - call_rcu_bh(&p->rcu, br_multicast_free_pg); + call_rcu(&p->rcu, br_multicast_free_pg); if (!mp->ports && !mp->host_joined && netif_running(br->dev)) @@ -358,7 +358,7 @@ static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max, } br_mdb_rehash_seq++; - call_rcu_bh(&mdb->rcu, br_mdb_free); + call_rcu(&mdb->rcu, br_mdb_free); out: rcu_assign_pointer(*mdbp, mdb); @@ -1629,7 +1629,7 @@ br_multicast_leave_group(struct net_bridge *br, rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); - call_rcu_bh(&p->rcu, br_multicast_free_pg); + call_rcu(&p->rcu, br_multicast_free_pg); br_mdb_notify(br->dev, port, group, RTM_DELMDB, p->flags); @@ -2051,19 +2051,19 @@ void br_multicast_dev_del(struct net_bridge *br) hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], hlist[ver]) { del_timer(&mp->timer); - call_rcu_bh(&mp->rcu, br_multicast_free_group); + call_rcu(&mp->rcu, br_multicast_free_group); } } if (mdb->old) { spin_unlock_bh(&br->multicast_lock); - rcu_barrier_bh(); + rcu_barrier(); spin_lock_bh(&br->multicast_lock); WARN_ON(mdb->old); } mdb->old = mdb; - call_rcu_bh(&mdb->rcu, br_mdb_free); + call_rcu(&mdb->rcu, br_mdb_free); out: spin_unlock_bh(&br->multicast_lock); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 5da9552b186b..677d3f332172 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -800,7 +800,7 @@ void __netpoll_cleanup(struct netpoll *np) ops->ndo_netpoll_cleanup(np->dev); RCU_INIT_POINTER(np->dev->npinfo, NULL); - call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); + call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info); } else RCU_INIT_POINTER(np->dev->npinfo, NULL); } @@ -811,7 +811,7 @@ void __netpoll_free(struct netpoll *np) ASSERT_RTNL(); /* Wait for transmitting packets to finish before freeing. */ - synchronize_rcu_bh(); + synchronize_rcu(); __netpoll_cleanup(np); kfree(np); } diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 56a99d0c9aa0..c92d6ccce610 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -580,7 +580,7 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock) write_unlock_bh(&sk->sk_callback_lock); sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); - call_rcu_sched(&psock->rcu, sk_psock_destroy); + call_rcu(&psock->rcu, sk_psock_destroy); } EXPORT_SYMBOL_GPL(sk_psock_drop); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 7d6ff983ba2c..dbd0f7bae00a 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2405,7 +2405,7 @@ static void __exit decnet_exit(void) proto_unregister(&dn_proto); - rcu_barrier_bh(); /* Wait for completion of call_rcu_bh()'s */ + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } module_exit(decnet_exit); #endif diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ca3b0f46de53..016e628c6ac9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -540,7 +540,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab) if (--tab->refcnt == 0) { list_del(&tab->list); - call_rcu_bh(&tab->rcu, stab_kfree_rcu); + call_rcu(&tab->rcu, stab_kfree_rcu); } } EXPORT_SYMBOL(qdisc_put_stab); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index de1663f7d3ad..66ba2ce2320f 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1372,7 +1372,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, if (!tp_head) { RCU_INIT_POINTER(*miniqp->p_miniq, NULL); /* Wait for flying RCU callback before it is freed. */ - rcu_barrier_bh(); + rcu_barrier(); return; } @@ -1380,10 +1380,10 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, &miniqp->miniq1 : &miniqp->miniq2; /* We need to make sure that readers won't see the miniq - * we are about to modify. So wait until previous call_rcu_bh callback + * we are about to modify. So wait until previous call_rcu callback * is done. */ - rcu_barrier_bh(); + rcu_barrier(); miniq->filter_list = tp_head; rcu_assign_pointer(*miniqp->p_miniq, miniq); @@ -1392,7 +1392,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, * block potential new user of miniq_old until all readers * are not seeing it. */ - call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func); + call_rcu(&miniq_old->rcu, mini_qdisc_rcu_func); } EXPORT_SYMBOL(mini_qdisc_pair_swap); diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index c883ec55654f..377f373db6c0 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -573,6 +573,27 @@ foreach my $entry (@mode_permission_funcs) { } $mode_perms_search = "(?:${mode_perms_search})"; +our %deprecated_apis = ( + "synchronize_rcu_bh" => "synchronize_rcu", + "synchronize_rcu_bh_expedited" => "synchronize_rcu_expedited", + "call_rcu_bh" => "call_rcu", + "rcu_barrier_bh" => "rcu_barrier", + "synchronize_sched" => "synchronize_rcu", + "synchronize_sched_expedited" => "synchronize_rcu_expedited", + "call_rcu_sched" => "call_rcu", + "rcu_barrier_sched" => "rcu_barrier", + "get_state_synchronize_sched" => "get_state_synchronize_rcu", + "cond_synchronize_sched" => "cond_synchronize_rcu", +); + +#Create a search pattern for all these strings to speed up a loop below +our $deprecated_apis_search = ""; +foreach my $entry (keys %deprecated_apis) { + $deprecated_apis_search .= '|' if ($deprecated_apis_search ne ""); + $deprecated_apis_search .= $entry; +} +$deprecated_apis_search = "(?:${deprecated_apis_search})"; + our $mode_perms_world_writable = qr{ S_IWUGO | S_IWOTH | @@ -6368,6 +6389,20 @@ sub process { "please use device_initcall() or more appropriate function instead of __initcall() (see include/linux/init.h)\n" . $herecurr); } +# check for spin_is_locked(), suggest lockdep instead + if ($line =~ /\bspin_is_locked\(/) { + WARN("USE_LOCKDEP", + "Where possible, use lockdep_assert_held instead of assertions based on spin_is_locked\n" . $herecurr); + } + +# check for deprecated apis + if ($line =~ /\b($deprecated_apis_search)\b\s*\(/) { + my $deprecated_api = $1; + my $new_api = $deprecated_apis{$deprecated_api}; + WARN("DEPRECATED_API", + "Deprecated use of '$deprecated_api', prefer '$new_api' instead\n" . $herecurr); + } + # check for various structs that are normally const (ops, kgdb, device_tree) # and avoid what seem like struct definitions 'struct foo {' if ($line !~ /\bconst\b/ && diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 6935ef94e77a..857d9e22826e 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -116,6 +116,6 @@ int scnprintf(char * buf, size_t size, const char * fmt, ...); #define round_down(x, y) ((x) & ~__round_mask(x, y)) #define current_gfp_context(k) 0 -#define synchronize_sched() +#define synchronize_rcu() #endif diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 5a7a62d76a50..19864f1cb27a 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -194,6 +194,14 @@ do shift done +if test -z "$TORTURE_INITRD" || tools/testing/selftests/rcutorture/bin/mkinitrd.sh +then + : +else + echo No initrd and unable to create one, aborting test >&2 + exit 1 +fi + CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG if test -z "$configs" diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh new file mode 100755 index 000000000000..da298394daa2 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh @@ -0,0 +1,136 @@ +#!/bin/bash +# +# Create an initrd directory if one does not already exist. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (C) IBM Corporation, 2013 +# +# Author: Connor Shu <Connor.Shu@ibm.com> + +D=tools/testing/selftests/rcutorture + +# Prerequisite checks +[ -z "$D" ] && echo >&2 "No argument supplied" && exit 1 +if [ ! -d "$D" ]; then + echo >&2 "$D does not exist: Malformed kernel source tree?" + exit 1 +fi +if [ -s "$D/initrd/init" ]; then + echo "$D/initrd/init already exists, no need to create it" + exit 0 +fi + +T=${TMPDIR-/tmp}/mkinitrd.sh.$$ +trap 'rm -rf $T' 0 2 +mkdir $T + +cat > $T/init << '__EOF___' +#!/bin/sh +# Run in userspace a few milliseconds every second. This helps to +# exercise the NO_HZ_FULL portions of RCU. +while : +do + q= + for i in \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + do + q="$q $i" + done + sleep 1 +done +__EOF___ + +# Try using dracut to create initrd +if command -v dracut >/dev/null 2>&1 +then + echo Creating $D/initrd using dracut. + # Filesystem creation + dracut --force --no-hostonly --no-hostonly-cmdline --module "base" $T/initramfs.img + cd $D + mkdir -p initrd + cd initrd + zcat $T/initramfs.img | cpio -id + cp $T/init init + chmod +x init + echo Done creating $D/initrd using dracut + exit 0 +fi + +# No dracut, so create a C-language initrd/init program and statically +# link it. This results in a very small initrd, but might be a bit less +# future-proof than dracut. +echo "Could not find dracut, attempting C initrd" +cd $D +mkdir -p initrd +cd initrd +cat > init.c << '___EOF___' +#ifndef NOLIBC +#include <unistd.h> +#include <sys/time.h> +#endif + +volatile unsigned long delaycount; + +int main(int argc, int argv[]) +{ + int i; + struct timeval tv; + struct timeval tvb; + + for (;;) { + sleep(1); + /* Need some userspace time. */ + if (gettimeofday(&tvb, NULL)) + continue; + do { + for (i = 0; i < 1000 * 100; i++) + delaycount = i * i; + if (gettimeofday(&tv, NULL)) + break; + tv.tv_sec -= tvb.tv_sec; + if (tv.tv_sec > 1) + break; + tv.tv_usec += tv.tv_sec * 1000 * 1000; + tv.tv_usec -= tvb.tv_usec; + } while (tv.tv_usec < 1000); + } + return 0; +} +___EOF___ + +# build using nolibc on supported archs (smaller executable) and fall +# back to regular glibc on other ones. +if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \ + "||__ARM_EABI__||__aarch64__\nyes\n#endif" \ + | ${CROSS_COMPILE}gcc -E -nostdlib -xc - \ + | grep -q '^yes'; then + # architecture supported by nolibc + ${CROSS_COMPILE}gcc -fno-asynchronous-unwind-tables -fno-ident \ + -nostdlib -include ../bin/nolibc.h -lgcc -s -static -Os \ + -o init init.c +else + ${CROSS_COMPILE}gcc -s -static -Os -o init init.c +fi + +rm init.c +echo "Done creating a statically linked C-language initrd" + +exit 0 diff --git a/tools/testing/selftests/rcutorture/bin/nolibc.h b/tools/testing/selftests/rcutorture/bin/nolibc.h new file mode 100644 index 000000000000..f98f5b92d3eb --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/nolibc.h @@ -0,0 +1,2197 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* nolibc.h + * Copyright (C) 2017-2018 Willy Tarreau <w@1wt.eu> + */ + +/* some archs (at least aarch64) don't expose the regular syscalls anymore by + * default, either because they have an "_at" replacement, or because there are + * more modern alternatives. For now we'd rather still use them. + */ +#define __ARCH_WANT_SYSCALL_NO_AT +#define __ARCH_WANT_SYSCALL_NO_FLAGS +#define __ARCH_WANT_SYSCALL_DEPRECATED + +#include <asm/unistd.h> +#include <asm/ioctls.h> +#include <asm/errno.h> +#include <linux/fs.h> +#include <linux/loop.h> + +#define NOLIBC + +/* Build a static executable this way : + * $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ + * -static -include nolibc.h -lgcc -o hello hello.c + * + * Useful calling convention table found here : + * http://man7.org/linux/man-pages/man2/syscall.2.html + * + * This doc is even better : + * https://w3challs.com/syscalls/ + */ + + +/* this way it will be removed if unused */ +static int errno; + +#ifndef NOLIBC_IGNORE_ERRNO +#define SET_ERRNO(v) do { errno = (v); } while (0) +#else +#define SET_ERRNO(v) do { } while (0) +#endif + +/* errno codes all ensure that they will not conflict with a valid pointer + * because they all correspond to the highest addressable memry page. + */ +#define MAX_ERRNO 4095 + +/* Declare a few quite common macros and types that usually are in stdlib.h, + * stdint.h, ctype.h, unistd.h and a few other common locations. + */ + +#define NULL ((void *)0) + +/* stdint types */ +typedef unsigned char uint8_t; +typedef signed char int8_t; +typedef unsigned short uint16_t; +typedef signed short int16_t; +typedef unsigned int uint32_t; +typedef signed int int32_t; +typedef unsigned long long uint64_t; +typedef signed long long int64_t; +typedef unsigned long size_t; +typedef signed long ssize_t; +typedef unsigned long uintptr_t; +typedef signed long intptr_t; +typedef signed long ptrdiff_t; + +/* for stat() */ +typedef unsigned int dev_t; +typedef unsigned long ino_t; +typedef unsigned int mode_t; +typedef signed int pid_t; +typedef unsigned int uid_t; +typedef unsigned int gid_t; +typedef unsigned long nlink_t; +typedef signed long off_t; +typedef signed long blksize_t; +typedef signed long blkcnt_t; +typedef signed long time_t; + +/* for poll() */ +struct pollfd { + int fd; + short int events; + short int revents; +}; + +/* for select() */ +struct timeval { + long tv_sec; + long tv_usec; +}; + +/* for pselect() */ +struct timespec { + long tv_sec; + long tv_nsec; +}; + +/* for gettimeofday() */ +struct timezone { + int tz_minuteswest; + int tz_dsttime; +}; + +/* for getdents64() */ +struct linux_dirent64 { + uint64_t d_ino; + int64_t d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[]; +}; + +/* commonly an fd_set represents 256 FDs */ +#define FD_SETSIZE 256 +typedef struct { uint32_t fd32[FD_SETSIZE/32]; } fd_set; + +/* needed by wait4() */ +struct rusage { + struct timeval ru_utime; + struct timeval ru_stime; + long ru_maxrss; + long ru_ixrss; + long ru_idrss; + long ru_isrss; + long ru_minflt; + long ru_majflt; + long ru_nswap; + long ru_inblock; + long ru_oublock; + long ru_msgsnd; + long ru_msgrcv; + long ru_nsignals; + long ru_nvcsw; + long ru_nivcsw; +}; + +/* stat flags (WARNING, octal here) */ +#define S_IFDIR 0040000 +#define S_IFCHR 0020000 +#define S_IFBLK 0060000 +#define S_IFREG 0100000 +#define S_IFIFO 0010000 +#define S_IFLNK 0120000 +#define S_IFSOCK 0140000 +#define S_IFMT 0170000 + +#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR) +#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR) +#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK) +#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG) +#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO) +#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK) +#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK) + +#define DT_UNKNOWN 0 +#define DT_FIFO 1 +#define DT_CHR 2 +#define DT_DIR 4 +#define DT_BLK 6 +#define DT_REG 8 +#define DT_LNK 10 +#define DT_SOCK 12 + +/* all the *at functions */ +#ifndef AT_FDWCD +#define AT_FDCWD -100 +#endif + +/* lseek */ +#define SEEK_SET 0 +#define SEEK_CUR 1 +#define SEEK_END 2 + +/* reboot */ +#define LINUX_REBOOT_MAGIC1 0xfee1dead +#define LINUX_REBOOT_MAGIC2 0x28121969 +#define LINUX_REBOOT_CMD_HALT 0xcdef0123 +#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc +#define LINUX_REBOOT_CMD_RESTART 0x01234567 +#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2 + + +/* The format of the struct as returned by the libc to the application, which + * significantly differs from the format returned by the stat() syscall flavours. + */ +struct stat { + dev_t st_dev; /* ID of device containing file */ + ino_t st_ino; /* inode number */ + mode_t st_mode; /* protection */ + nlink_t st_nlink; /* number of hard links */ + uid_t st_uid; /* user ID of owner */ + gid_t st_gid; /* group ID of owner */ + dev_t st_rdev; /* device ID (if special file) */ + off_t st_size; /* total size, in bytes */ + blksize_t st_blksize; /* blocksize for file system I/O */ + blkcnt_t st_blocks; /* number of 512B blocks allocated */ + time_t st_atime; /* time of last access */ + time_t st_mtime; /* time of last modification */ + time_t st_ctime; /* time of last status change */ +}; + +#define WEXITSTATUS(status) (((status) & 0xff00) >> 8) +#define WIFEXITED(status) (((status) & 0x7f) == 0) + + +/* Below comes the architecture-specific code. For each architecture, we have + * the syscall declarations and the _start code definition. This is the only + * global part. On all architectures the kernel puts everything in the stack + * before jumping to _start just above us, without any return address (_start + * is not a function but an entry pint). So at the stack pointer we find argc. + * Then argv[] begins, and ends at the first NULL. Then we have envp which + * starts and ends with a NULL as well. So envp=argv+argc+1. + */ + +#if defined(__x86_64__) +/* Syscalls for x86_64 : + * - registers are 64-bit + * - syscall number is passed in rax + * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively + * - the system call is performed by calling the syscall instruction + * - syscall return comes in rax + * - rcx and r8..r11 may be clobbered, others are preserved. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + */ + +#define my_syscall0(num) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret) \ + : "0"(_num) \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret) \ + : "r"(_arg1), \ + "0"(_num) \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + register long _arg3 asm("rdx") = (long)(arg3); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "0"(_num) \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + register long _arg3 asm("rdx") = (long)(arg3); \ + register long _arg4 asm("r10") = (long)(arg4); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret), "=r"(_arg4) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "0"(_num) \ + : "rcx", "r8", "r9", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + register long _arg3 asm("rdx") = (long)(arg3); \ + register long _arg4 asm("r10") = (long)(arg4); \ + register long _arg5 asm("r8") = (long)(arg5); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "0"(_num) \ + : "rcx", "r9", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + register long _arg3 asm("rdx") = (long)(arg3); \ + register long _arg4 asm("r10") = (long)(arg4); \ + register long _arg5 asm("r8") = (long)(arg5); \ + register long _arg6 asm("r9") = (long)(arg6); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6), "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +/* startup code */ +asm(".section .text\n" + ".global _start\n" + "_start:\n" + "pop %rdi\n" // argc (first arg, %rdi) + "mov %rsp, %rsi\n" // argv[] (second arg, %rsi) + "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) + "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned when + "sub $8, %rsp\n" // entering the callee + "call main\n" // main() returns the status code, we'll exit with it. + "movzb %al, %rdi\n" // retrieve exit code from 8 lower bits + "mov $60, %rax\n" // NR_exit == 60 + "syscall\n" // really exit + "hlt\n" // ensure it does not return + ""); + +/* fcntl / open */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x10000 + +/* The struct returned by the stat() syscall, equivalent to stat64(). The + * syscall returns 116 bytes and stops in the middle of __unused. + */ +struct sys_stat_struct { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned int st_mode; + unsigned int st_uid; + + unsigned int st_gid; + unsigned int __pad0; + unsigned long st_rdev; + long st_size; + long st_blksize; + + long st_blocks; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + long __unused[3]; +}; + +#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) +/* Syscalls for i386 : + * - mostly similar to x86_64 + * - registers are 32-bit + * - syscall number is passed in eax + * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively + * - all registers are preserved (except eax of course) + * - the system call is performed by calling int $0x80 + * - syscall return comes in eax + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * + * Also, i386 supports the old_select syscall if newselect is not available + */ +#define __ARCH_WANT_SYS_OLD_SELECT + +#define my_syscall0(num) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + register long _arg1 asm("ebx") = (long)(arg1); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + register long _arg1 asm("ebx") = (long)(arg1); \ + register long _arg2 asm("ecx") = (long)(arg2); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + register long _arg1 asm("ebx") = (long)(arg1); \ + register long _arg2 asm("ecx") = (long)(arg2); \ + register long _arg3 asm("edx") = (long)(arg3); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + register long _arg1 asm("ebx") = (long)(arg1); \ + register long _arg2 asm("ecx") = (long)(arg2); \ + register long _arg3 asm("edx") = (long)(arg3); \ + register long _arg4 asm("esi") = (long)(arg4); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + register long _arg1 asm("ebx") = (long)(arg1); \ + register long _arg2 asm("ecx") = (long)(arg2); \ + register long _arg3 asm("edx") = (long)(arg3); \ + register long _arg4 asm("esi") = (long)(arg4); \ + register long _arg5 asm("edi") = (long)(arg5); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +/* startup code */ +asm(".section .text\n" + ".global _start\n" + "_start:\n" + "pop %eax\n" // argc (first arg, %eax) + "mov %esp, %ebx\n" // argv[] (second arg, %ebx) + "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx) + "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned when + "push %ecx\n" // push all registers on the stack so that we + "push %ebx\n" // support both regparm and plain stack modes + "push %eax\n" + "call main\n" // main() returns the status code in %eax + "movzbl %al, %ebx\n" // retrieve exit code from lower 8 bits + "movl $1, %eax\n" // NR_exit == 1 + "int $0x80\n" // exit now + "hlt\n" // ensure it does not + ""); + +/* fcntl / open */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x10000 + +/* The struct returned by the stat() syscall, 32-bit only, the syscall returns + * exactly 56 bytes (stops before the unused array). + */ +struct sys_stat_struct { + unsigned long st_dev; + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + + unsigned long st_rdev; + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long __unused[2]; +}; + +#elif defined(__ARM_EABI__) +/* Syscalls for ARM in ARM or Thumb modes : + * - registers are 32-bit + * - stack is 8-byte aligned + * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html) + * - syscall number is passed in r7 + * - arguments are in r0, r1, r2, r3, r4, r5 + * - the system call is performed by calling svc #0 + * - syscall return comes in r0. + * - only lr is clobbered. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * + * Also, ARM supports the old_select syscall if newselect is not available + */ +#define __ARCH_WANT_SYS_OLD_SELECT + +#define my_syscall0(num) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0"); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0") = (long)(arg1); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0") = (long)(arg1); \ + register long _arg2 asm("r1") = (long)(arg2); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0") = (long)(arg1); \ + register long _arg2 asm("r1") = (long)(arg2); \ + register long _arg3 asm("r2") = (long)(arg3); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0") = (long)(arg1); \ + register long _arg2 asm("r1") = (long)(arg2); \ + register long _arg3 asm("r2") = (long)(arg3); \ + register long _arg4 asm("r3") = (long)(arg4); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0") = (long)(arg1); \ + register long _arg2 asm("r1") = (long)(arg2); \ + register long _arg3 asm("r2") = (long)(arg3); \ + register long _arg4 asm("r3") = (long)(arg4); \ + register long _arg5 asm("r4") = (long)(arg5); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r" (_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +/* startup code */ +asm(".section .text\n" + ".global _start\n" + "_start:\n" +#if defined(__THUMBEB__) || defined(__THUMBEL__) + /* We enter here in 32-bit mode but if some previous functions were in + * 16-bit mode, the assembler cannot know, so we need to tell it we're in + * 32-bit now, then switch to 16-bit (is there a better way to do it than + * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that + * it generates correct instructions. Note that we do not support thumb1. + */ + ".code 32\n" + "add r0, pc, #1\n" + "bx r0\n" + ".code 16\n" +#endif + "pop {%r0}\n" // argc was in the stack + "mov %r1, %sp\n" // argv = sp + "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ... + "add %r2, %r2, $4\n" // ... + 4 + "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the + "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc) + "bl main\n" // main() returns the status code, we'll exit with it. + "and %r0, %r0, $0xff\n" // limit exit code to 8 bits + "movs r7, $1\n" // NR_exit == 1 + "svc $0x00\n" + ""); + +/* fcntl / open */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x4000 + +/* The struct returned by the stat() syscall, 32-bit only, the syscall returns + * exactly 56 bytes (stops before the unused array). In big endian, the format + * differs as devices are returned as short only. + */ +struct sys_stat_struct { +#if defined(__ARMEB__) + unsigned short st_dev; + unsigned short __pad1; +#else + unsigned long st_dev; +#endif + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; +#if defined(__ARMEB__) + unsigned short st_rdev; + unsigned short __pad2; +#else + unsigned long st_rdev; +#endif + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long __unused[2]; +}; + +#elif defined(__aarch64__) +/* Syscalls for AARCH64 : + * - registers are 64-bit + * - stack is 16-byte aligned + * - syscall number is passed in x8 + * - arguments are in x0, x1, x2, x3, x4, x5 + * - the system call is performed by calling svc 0 + * - syscall return comes in x0. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * + * On aarch64, select() is not implemented so we have to use pselect6(). + */ +#define __ARCH_WANT_SYS_PSELECT6 + +#define my_syscall0(num) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0"); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + register long _arg2 asm("x1") = (long)(arg2); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + register long _arg2 asm("x1") = (long)(arg2); \ + register long _arg3 asm("x2") = (long)(arg3); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + register long _arg2 asm("x1") = (long)(arg2); \ + register long _arg3 asm("x2") = (long)(arg3); \ + register long _arg4 asm("x3") = (long)(arg4); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + register long _arg2 asm("x1") = (long)(arg2); \ + register long _arg3 asm("x2") = (long)(arg3); \ + register long _arg4 asm("x3") = (long)(arg4); \ + register long _arg5 asm("x4") = (long)(arg5); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r" (_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + register long _arg2 asm("x1") = (long)(arg2); \ + register long _arg3 asm("x2") = (long)(arg3); \ + register long _arg4 asm("x3") = (long)(arg4); \ + register long _arg5 asm("x4") = (long)(arg5); \ + register long _arg6 asm("x5") = (long)(arg6); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r" (_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* startup code */ +asm(".section .text\n" + ".global _start\n" + "_start:\n" + "ldr x0, [sp]\n" // argc (x0) was in the stack + "add x1, sp, 8\n" // argv (x1) = sp + "lsl x2, x0, 3\n" // envp (x2) = 8*argc ... + "add x2, x2, 8\n" // + 8 (skip null) + "add x2, x2, x1\n" // + argv + "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee + "bl main\n" // main() returns the status code, we'll exit with it. + "and x0, x0, 0xff\n" // limit exit code to 8 bits + "mov x8, 93\n" // NR_exit == 93 + "svc #0\n" + ""); + +/* fcntl / open */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x4000 + +/* The struct returned by the newfstatat() syscall. Differs slightly from the + * x86_64's stat one by field ordering, so be careful. + */ +struct sys_stat_struct { + unsigned long st_dev; + unsigned long st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + + unsigned long st_rdev; + unsigned long __pad1; + long st_size; + int st_blksize; + int __pad2; + + long st_blocks; + long st_atime; + unsigned long st_atime_nsec; + long st_mtime; + + unsigned long st_mtime_nsec; + long st_ctime; + unsigned long st_ctime_nsec; + unsigned int __unused[2]; +}; + +#elif defined(__mips__) && defined(_ABIO32) +/* Syscalls for MIPS ABI O32 : + * - WARNING! there's always a delayed slot! + * - WARNING again, the syntax is different, registers take a '$' and numbers + * do not. + * - registers are 32-bit + * - stack is 8-byte aligned + * - syscall number is passed in v0 (starts at 0xfa0). + * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to + * leave some room in the stack for the callee to save a0..a3 if needed. + * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are + * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as + * scall32-o32.S in the kernel sources. + * - the system call is performed by calling "syscall" + * - syscall return comes in v0, and register a3 needs to be checked to know + * if an error occured, in which case errno is in v0. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + */ + +#define my_syscall0(num) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg4 asm("a3"); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "r"(_num) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg1 asm("a0") = (long)(arg1); \ + register long _arg4 asm("a3"); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg1 asm("a0") = (long)(arg1); \ + register long _arg2 asm("a1") = (long)(arg2); \ + register long _arg4 asm("a3"); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg1 asm("a0") = (long)(arg1); \ + register long _arg2 asm("a1") = (long)(arg2); \ + register long _arg3 asm("a2") = (long)(arg3); \ + register long _arg4 asm("a3"); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg1 asm("a0") = (long)(arg1); \ + register long _arg2 asm("a1") = (long)(arg2); \ + register long _arg3 asm("a2") = (long)(arg3); \ + register long _arg4 asm("a3") = (long)(arg4); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg1 asm("a0") = (long)(arg1); \ + register long _arg2 asm("a1") = (long)(arg2); \ + register long _arg3 asm("a2") = (long)(arg3); \ + register long _arg4 asm("a3") = (long)(arg4); \ + register long _arg5 = (long)(arg5); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "sw %7, 16($sp)\n" \ + "syscall\n " \ + "addiu $sp, $sp, 32\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +/* startup code, note that it's called __start on MIPS */ +asm(".section .text\n" + ".set nomips16\n" + ".global __start\n" + ".set noreorder\n" + ".option pic0\n" + ".ent __start\n" + "__start:\n" + "lw $a0,($sp)\n" // argc was in the stack + "addiu $a1, $sp, 4\n" // argv = sp + 4 + "sll $a2, $a0, 2\n" // a2 = argc * 4 + "add $a2, $a2, $a1\n" // envp = argv + 4*argc ... + "addiu $a2, $a2, 4\n" // ... + 4 + "li $t0, -8\n" + "and $sp, $sp, $t0\n" // sp must be 8-byte aligned + "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there! + "jal main\n" // main() returns the status code, we'll exit with it. + "nop\n" // delayed slot + "and $a0, $v0, 0xff\n" // limit exit code to 8 bits + "li $v0, 4001\n" // NR_exit == 4001 + "syscall\n" + ".end __start\n" + ""); + +/* fcntl / open */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_APPEND 0x0008 +#define O_NONBLOCK 0x0080 +#define O_CREAT 0x0100 +#define O_TRUNC 0x0200 +#define O_EXCL 0x0400 +#define O_NOCTTY 0x0800 +#define O_DIRECTORY 0x10000 + +/* The struct returned by the stat() syscall. 88 bytes are returned by the + * syscall. + */ +struct sys_stat_struct { + unsigned int st_dev; + long st_pad1[3]; + unsigned long st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + unsigned int st_rdev; + long st_pad2[2]; + long st_size; + long st_pad3; + long st_atime; + long st_atime_nsec; + long st_mtime; + long st_mtime_nsec; + long st_ctime; + long st_ctime_nsec; + long st_blksize; + long st_blocks; + long st_pad4[14]; +}; + +#endif + + +/* Below are the C functions used to declare the raw syscalls. They try to be + * architecture-agnostic, and return either a success or -errno. Declaring them + * static will lead to them being inlined in most cases, but it's still possible + * to reference them by a pointer if needed. + */ +static __attribute__((unused)) +void *sys_brk(void *addr) +{ + return (void *)my_syscall1(__NR_brk, addr); +} + +static __attribute__((noreturn,unused)) +void sys_exit(int status) +{ + my_syscall1(__NR_exit, status & 255); + while(1); // shut the "noreturn" warnings. +} + +static __attribute__((unused)) +int sys_chdir(const char *path) +{ + return my_syscall1(__NR_chdir, path); +} + +static __attribute__((unused)) +int sys_chmod(const char *path, mode_t mode) +{ +#ifdef __NR_fchmodat + return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); +#else + return my_syscall2(__NR_chmod, path, mode); +#endif +} + +static __attribute__((unused)) +int sys_chown(const char *path, uid_t owner, gid_t group) +{ +#ifdef __NR_fchownat + return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); +#else + return my_syscall3(__NR_chown, path, owner, group); +#endif +} + +static __attribute__((unused)) +int sys_chroot(const char *path) +{ + return my_syscall1(__NR_chroot, path); +} + +static __attribute__((unused)) +int sys_close(int fd) +{ + return my_syscall1(__NR_close, fd); +} + +static __attribute__((unused)) +int sys_dup(int fd) +{ + return my_syscall1(__NR_dup, fd); +} + +static __attribute__((unused)) +int sys_dup2(int old, int new) +{ + return my_syscall2(__NR_dup2, old, new); +} + +static __attribute__((unused)) +int sys_execve(const char *filename, char *const argv[], char *const envp[]) +{ + return my_syscall3(__NR_execve, filename, argv, envp); +} + +static __attribute__((unused)) +pid_t sys_fork(void) +{ + return my_syscall0(__NR_fork); +} + +static __attribute__((unused)) +int sys_fsync(int fd) +{ + return my_syscall1(__NR_fsync, fd); +} + +static __attribute__((unused)) +int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count) +{ + return my_syscall3(__NR_getdents64, fd, dirp, count); +} + +static __attribute__((unused)) +pid_t sys_getpgrp(void) +{ + return my_syscall0(__NR_getpgrp); +} + +static __attribute__((unused)) +pid_t sys_getpid(void) +{ + return my_syscall0(__NR_getpid); +} + +static __attribute__((unused)) +int sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + return my_syscall2(__NR_gettimeofday, tv, tz); +} + +static __attribute__((unused)) +int sys_ioctl(int fd, unsigned long req, void *value) +{ + return my_syscall3(__NR_ioctl, fd, req, value); +} + +static __attribute__((unused)) +int sys_kill(pid_t pid, int signal) +{ + return my_syscall2(__NR_kill, pid, signal); +} + +static __attribute__((unused)) +int sys_link(const char *old, const char *new) +{ +#ifdef __NR_linkat + return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); +#else + return my_syscall2(__NR_link, old, new); +#endif +} + +static __attribute__((unused)) +off_t sys_lseek(int fd, off_t offset, int whence) +{ + return my_syscall3(__NR_lseek, fd, offset, whence); +} + +static __attribute__((unused)) +int sys_mkdir(const char *path, mode_t mode) +{ +#ifdef __NR_mkdirat + return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); +#else + return my_syscall2(__NR_mkdir, path, mode); +#endif +} + +static __attribute__((unused)) +long sys_mknod(const char *path, mode_t mode, dev_t dev) +{ +#ifdef __NR_mknodat + return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); +#else + return my_syscall3(__NR_mknod, path, mode, dev); +#endif +} + +static __attribute__((unused)) +int sys_mount(const char *src, const char *tgt, const char *fst, + unsigned long flags, const void *data) +{ + return my_syscall5(__NR_mount, src, tgt, fst, flags, data); +} + +static __attribute__((unused)) +int sys_open(const char *path, int flags, mode_t mode) +{ +#ifdef __NR_openat + return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); +#else + return my_syscall3(__NR_open, path, flags, mode); +#endif +} + +static __attribute__((unused)) +int sys_pivot_root(const char *new, const char *old) +{ + return my_syscall2(__NR_pivot_root, new, old); +} + +static __attribute__((unused)) +int sys_poll(struct pollfd *fds, int nfds, int timeout) +{ + return my_syscall3(__NR_poll, fds, nfds, timeout); +} + +static __attribute__((unused)) +ssize_t sys_read(int fd, void *buf, size_t count) +{ + return my_syscall3(__NR_read, fd, buf, count); +} + +static __attribute__((unused)) +ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) +{ + return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); +} + +static __attribute__((unused)) +int sys_sched_yield(void) +{ + return my_syscall0(__NR_sched_yield); +} + +static __attribute__((unused)) +int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) +{ +#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect) + struct sel_arg_struct { + unsigned long n; + fd_set *r, *w, *e; + struct timeval *t; + } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout }; + return my_syscall1(__NR_select, &arg); +#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6) + struct timespec t; + + if (timeout) { + t.tv_sec = timeout->tv_sec; + t.tv_nsec = timeout->tv_usec * 1000; + } + return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); +#else +#ifndef __NR__newselect +#define __NR__newselect __NR_select +#endif + return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout); +#endif +} + +static __attribute__((unused)) +int sys_setpgid(pid_t pid, pid_t pgid) +{ + return my_syscall2(__NR_setpgid, pid, pgid); +} + +static __attribute__((unused)) +pid_t sys_setsid(void) +{ + return my_syscall0(__NR_setsid); +} + +static __attribute__((unused)) +int sys_stat(const char *path, struct stat *buf) +{ + struct sys_stat_struct stat; + long ret; + +#ifdef __NR_newfstatat + /* only solution for arm64 */ + ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0); +#else + ret = my_syscall2(__NR_stat, path, &stat); +#endif + buf->st_dev = stat.st_dev; + buf->st_ino = stat.st_ino; + buf->st_mode = stat.st_mode; + buf->st_nlink = stat.st_nlink; + buf->st_uid = stat.st_uid; + buf->st_gid = stat.st_gid; + buf->st_rdev = stat.st_rdev; + buf->st_size = stat.st_size; + buf->st_blksize = stat.st_blksize; + buf->st_blocks = stat.st_blocks; + buf->st_atime = stat.st_atime; + buf->st_mtime = stat.st_mtime; + buf->st_ctime = stat.st_ctime; + return ret; +} + + +static __attribute__((unused)) +int sys_symlink(const char *old, const char *new) +{ +#ifdef __NR_symlinkat + return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new); +#else + return my_syscall2(__NR_symlink, old, new); +#endif +} + +static __attribute__((unused)) +mode_t sys_umask(mode_t mode) +{ + return my_syscall1(__NR_umask, mode); +} + +static __attribute__((unused)) +int sys_umount2(const char *path, int flags) +{ + return my_syscall2(__NR_umount2, path, flags); +} + +static __attribute__((unused)) +int sys_unlink(const char *path) +{ +#ifdef __NR_unlinkat + return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); +#else + return my_syscall1(__NR_unlink, path); +#endif +} + +static __attribute__((unused)) +pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) +{ + return my_syscall4(__NR_wait4, pid, status, options, rusage); +} + +static __attribute__((unused)) +pid_t sys_waitpid(pid_t pid, int *status, int options) +{ + return sys_wait4(pid, status, options, 0); +} + +static __attribute__((unused)) +pid_t sys_wait(int *status) +{ + return sys_waitpid(-1, status, 0); +} + +static __attribute__((unused)) +ssize_t sys_write(int fd, const void *buf, size_t count) +{ + return my_syscall3(__NR_write, fd, buf, count); +} + + +/* Below are the libc-compatible syscalls which return x or -1 and set errno. + * They rely on the functions above. Similarly they're marked static so that it + * is possible to assign pointers to them if needed. + */ + +static __attribute__((unused)) +int brk(void *addr) +{ + void *ret = sys_brk(addr); + + if (!ret) { + SET_ERRNO(ENOMEM); + return -1; + } + return 0; +} + +static __attribute__((noreturn,unused)) +void exit(int status) +{ + sys_exit(status); +} + +static __attribute__((unused)) +int chdir(const char *path) +{ + int ret = sys_chdir(path); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int chmod(const char *path, mode_t mode) +{ + int ret = sys_chmod(path, mode); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int chown(const char *path, uid_t owner, gid_t group) +{ + int ret = sys_chown(path, owner, group); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int chroot(const char *path) +{ + int ret = sys_chroot(path); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int close(int fd) +{ + int ret = sys_close(fd); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int dup2(int old, int new) +{ + int ret = sys_dup2(old, new); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int execve(const char *filename, char *const argv[], char *const envp[]) +{ + int ret = sys_execve(filename, argv, envp); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t fork(void) +{ + pid_t ret = sys_fork(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int fsync(int fd) +{ + int ret = sys_fsync(fd); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int getdents64(int fd, struct linux_dirent64 *dirp, int count) +{ + int ret = sys_getdents64(fd, dirp, count); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t getpgrp(void) +{ + pid_t ret = sys_getpgrp(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t getpid(void) +{ + pid_t ret = sys_getpid(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + int ret = sys_gettimeofday(tv, tz); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int ioctl(int fd, unsigned long req, void *value) +{ + int ret = sys_ioctl(fd, req, value); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int kill(pid_t pid, int signal) +{ + int ret = sys_kill(pid, signal); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int link(const char *old, const char *new) +{ + int ret = sys_link(old, new); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +off_t lseek(int fd, off_t offset, int whence) +{ + off_t ret = sys_lseek(fd, offset, whence); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int mkdir(const char *path, mode_t mode) +{ + int ret = sys_mkdir(path, mode); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int mknod(const char *path, mode_t mode, dev_t dev) +{ + int ret = sys_mknod(path, mode, dev); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int mount(const char *src, const char *tgt, + const char *fst, unsigned long flags, + const void *data) +{ + int ret = sys_mount(src, tgt, fst, flags, data); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int open(const char *path, int flags, mode_t mode) +{ + int ret = sys_open(path, flags, mode); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int pivot_root(const char *new, const char *old) +{ + int ret = sys_pivot_root(new, old); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int poll(struct pollfd *fds, int nfds, int timeout) +{ + int ret = sys_poll(fds, nfds, timeout); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +ssize_t read(int fd, void *buf, size_t count) +{ + ssize_t ret = sys_read(fd, buf, count); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int reboot(int cmd) +{ + int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +void *sbrk(intptr_t inc) +{ + void *ret; + + /* first call to find current end */ + if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc)) + return ret + inc; + + SET_ERRNO(ENOMEM); + return (void *)-1; +} + +static __attribute__((unused)) +int sched_yield(void) +{ + int ret = sys_sched_yield(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) +{ + int ret = sys_select(nfds, rfds, wfds, efds, timeout); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int setpgid(pid_t pid, pid_t pgid) +{ + int ret = sys_setpgid(pid, pgid); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t setsid(void) +{ + pid_t ret = sys_setsid(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +unsigned int sleep(unsigned int seconds) +{ + struct timeval my_timeval = { seconds, 0 }; + + if (sys_select(0, 0, 0, 0, &my_timeval) < 0) + return my_timeval.tv_sec + !!my_timeval.tv_usec; + else + return 0; +} + +static __attribute__((unused)) +int stat(const char *path, struct stat *buf) +{ + int ret = sys_stat(path, buf); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int symlink(const char *old, const char *new) +{ + int ret = sys_symlink(old, new); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int tcsetpgrp(int fd, pid_t pid) +{ + return ioctl(fd, TIOCSPGRP, &pid); +} + +static __attribute__((unused)) +mode_t umask(mode_t mode) +{ + return sys_umask(mode); +} + +static __attribute__((unused)) +int umount2(const char *path, int flags) +{ + int ret = sys_umount2(path, flags); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int unlink(const char *path) +{ + int ret = sys_unlink(path); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) +{ + pid_t ret = sys_wait4(pid, status, options, rusage); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t waitpid(pid_t pid, int *status, int options) +{ + pid_t ret = sys_waitpid(pid, status, options); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t wait(int *status) +{ + pid_t ret = sys_wait(status); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +ssize_t write(int fd, const void *buf, size_t count) +{ + ssize_t ret = sys_write(fd, buf, count); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +/* some size-optimized reimplementations of a few common str* and mem* + * functions. They're marked static, except memcpy() and raise() which are used + * by libgcc on ARM, so they are marked weak instead in order not to cause an + * error when building a program made of multiple files (not recommended). + */ + +static __attribute__((unused)) +void *memmove(void *dst, const void *src, size_t len) +{ + ssize_t pos = (dst <= src) ? -1 : (long)len; + void *ret = dst; + + while (len--) { + pos += (dst <= src) ? 1 : -1; + ((char *)dst)[pos] = ((char *)src)[pos]; + } + return ret; +} + +static __attribute__((unused)) +void *memset(void *dst, int b, size_t len) +{ + char *p = dst; + + while (len--) + *(p++) = b; + return dst; +} + +static __attribute__((unused)) +int memcmp(const void *s1, const void *s2, size_t n) +{ + size_t ofs = 0; + char c1 = 0; + + while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) { + ofs++; + } + return c1; +} + +static __attribute__((unused)) +char *strcpy(char *dst, const char *src) +{ + char *ret = dst; + + while ((*dst++ = *src++)); + return ret; +} + +static __attribute__((unused)) +char *strchr(const char *s, int c) +{ + while (*s) { + if (*s == (char)c) + return (char *)s; + s++; + } + return NULL; +} + +static __attribute__((unused)) +char *strrchr(const char *s, int c) +{ + const char *ret = NULL; + + while (*s) { + if (*s == (char)c) + ret = s; + s++; + } + return (char *)ret; +} + +static __attribute__((unused)) +size_t nolibc_strlen(const char *str) +{ + size_t len; + + for (len = 0; str[len]; len++); + return len; +} + +#define strlen(str) ({ \ + __builtin_constant_p((str)) ? \ + __builtin_strlen((str)) : \ + nolibc_strlen((str)); \ +}) + +static __attribute__((unused)) +int isdigit(int c) +{ + return (unsigned int)(c - '0') <= 9; +} + +static __attribute__((unused)) +long atol(const char *s) +{ + unsigned long ret = 0; + unsigned long d; + int neg = 0; + + if (*s == '-') { + neg = 1; + s++; + } + + while (1) { + d = (*s++) - '0'; + if (d > 9) + break; + ret *= 10; + ret += d; + } + + return neg ? -ret : ret; +} + +static __attribute__((unused)) +int atoi(const char *s) +{ + return atol(s); +} + +static __attribute__((unused)) +const char *ltoa(long in) +{ + /* large enough for -9223372036854775808 */ + static char buffer[21]; + char *pos = buffer + sizeof(buffer) - 1; + int neg = in < 0; + unsigned long n = neg ? -in : in; + + *pos-- = '\0'; + do { + *pos-- = '0' + n % 10; + n /= 10; + if (pos < buffer) + return pos + 1; + } while (n); + + if (neg) + *pos-- = '-'; + return pos + 1; +} + +__attribute__((weak,unused)) +void *memcpy(void *dst, const void *src, size_t len) +{ + return memmove(dst, src, len); +} + +/* needed by libgcc for divide by zero */ +__attribute__((weak,unused)) +int raise(int signal) +{ + return kill(getpid(), signal); +} + +/* Here come a few helper functions */ + +static __attribute__((unused)) +void FD_ZERO(fd_set *set) +{ + memset(set, 0, sizeof(*set)); +} + +static __attribute__((unused)) +void FD_SET(int fd, fd_set *set) +{ + if (fd < 0 || fd >= FD_SETSIZE) + return; + set->fd32[fd / 32] |= 1 << (fd & 31); +} + +/* WARNING, it only deals with the 4096 first majors and 256 first minors */ +static __attribute__((unused)) +dev_t makedev(unsigned int major, unsigned int minor) +{ + return ((major & 0xfff) << 8) | (minor & 0xff); +} diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt index 833f826d6ec2..933b4fd12327 100644 --- a/tools/testing/selftests/rcutorture/doc/initrd.txt +++ b/tools/testing/selftests/rcutorture/doc/initrd.txt @@ -1,9 +1,12 @@ -This document describes one way to create the initrd directory hierarchy -in order to allow an initrd to be built into your kernel. The trick -here is to steal the initrd file used on your Linux laptop, Ubuntu in -this case. There are probably much better ways of doing this. +The rcutorture scripting tools automatically create the needed initrd +directory using dracut. Failing that, this tool will create an initrd +containing a single statically linked binary named "init" that loops +over a very long sleep() call. In both cases, this creation is done +by tools/testing/selftests/rcutorture/bin/mkinitrd.sh. -That said, here are the commands: +However, if you are attempting to run rcutorture on a system that does +not have dracut installed, and if you don't like the notion of static +linking, you might wish to press an existing initrd into service: ------------------------------------------------------------------------ cd tools/testing/selftests/rcutorture @@ -11,22 +14,7 @@ zcat /initrd.img > /tmp/initrd.img.zcat mkdir initrd cd initrd cpio -id < /tmp/initrd.img.zcat ------------------------------------------------------------------------- - -Another way to create an initramfs image is using "dracut"[1], which is -available on many distros, however the initramfs dracut generates is a cpio -archive with another cpio archive in it, so an extra step is needed to create -the initrd directory hierarchy. - -Here are the commands to create a initrd directory for rcutorture using -dracut: - ------------------------------------------------------------------------- -dracut --no-hostonly --no-hostonly-cmdline --module "base bash shutdown" /tmp/initramfs.img -cd tools/testing/selftests/rcutorture -mkdir initrd -cd initrd -/usr/lib/dracut/skipcpio /tmp/initramfs.img | zcat | cpio -id < /tmp/initramfs.img +# Manually verify that initrd contains needed binaries and libraries. ------------------------------------------------------------------------ Interestingly enough, if you are running rcutorture, you don't really @@ -39,75 +27,12 @@ with 0755 mode. ------------------------------------------------------------------------ #!/bin/sh -[ -d /dev ] || mkdir -m 0755 /dev -[ -d /root ] || mkdir -m 0700 /root -[ -d /sys ] || mkdir /sys -[ -d /proc ] || mkdir /proc -[ -d /tmp ] || mkdir /tmp -mkdir -p /var/lock -mount -t sysfs -o nodev,noexec,nosuid sysfs /sys -mount -t proc -o nodev,noexec,nosuid proc /proc -# Some things don't work properly without /etc/mtab. -ln -sf /proc/mounts /etc/mtab - -# Note that this only becomes /dev on the real filesystem if udev's scripts -# are used; which they will be, but it's worth pointing out -if ! mount -t devtmpfs -o mode=0755 udev /dev; then - echo "W: devtmpfs not available, falling back to tmpfs for /dev" - mount -t tmpfs -o mode=0755 udev /dev - [ -e /dev/console ] || mknod --mode=600 /dev/console c 5 1 - [ -e /dev/kmsg ] || mknod --mode=644 /dev/kmsg c 1 11 - [ -e /dev/null ] || mknod --mode=666 /dev/null c 1 3 -fi - -mkdir /dev/pts -mount -t devpts -o noexec,nosuid,gid=5,mode=0620 devpts /dev/pts || true -mount -t tmpfs -o "nosuid,size=20%,mode=0755" tmpfs /run -mkdir /run/initramfs -# compatibility symlink for the pre-oneiric locations -ln -s /run/initramfs /dev/.initramfs - -# Export relevant variables -export ROOT= -export ROOTDELAY= -export ROOTFLAGS= -export ROOTFSTYPE= -export IP= -export BOOT= -export BOOTIF= -export UBIMTD= -export break= -export init=/sbin/init -export quiet=n -export readonly=y -export rootmnt=/root -export debug= -export panic= -export blacklist= -export resume= -export resume_offset= -export recovery= - -for i in /sys/devices/system/cpu/cpu*/online -do - case $i in - '/sys/devices/system/cpu/cpu0/online') - ;; - '/sys/devices/system/cpu/cpu*/online') - ;; - *) - echo 1 > $i - ;; - esac -done - while : do sleep 10 done ------------------------------------------------------------------------ -References: -[1]: https://dracut.wiki.kernel.org/index.php/Main_Page -[2]: http://blog.elastocloud.org/2015/06/rapid-linux-kernel-devtest-with-qemu.html -[3]: https://www.centos.org/forums/viewtopic.php?t=51621 +This approach also allows most of the binaries and libraries in the +initrd filesystem to be dispensed with, which can save significant +space in rcutorture's "res" directory. diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h index 891ad13e95b2..d27285f8ee82 100644 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h +++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h @@ -131,8 +131,8 @@ struct hlist_node { * weird ABI and we need to ask it explicitly. * * The alignment is required to guarantee that bits 0 and 1 of @next will be - * clear under normal conditions -- as long as we use call_rcu(), - * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback. + * clear under normal conditions -- as long as we use call_rcu() or + * call_srcu() to queue callback. * * This guarantee is important for few reasons: * - future call_rcu_lazy() will make use of lower bits in the pointer; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 7cfdfbc910e0..50e25438fb3c 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -196,7 +196,7 @@ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active) */ static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) { - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock)); + lockdep_assert_held(&irq->irq_lock); /* If the interrupt is active, it must stay on the current vcpu */ if (irq->active) @@ -273,7 +273,7 @@ static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); + lockdep_assert_held(&vgic_cpu->ap_list_lock); list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp); } @@ -311,7 +311,7 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, { struct kvm_vcpu *vcpu; - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock)); + lockdep_assert_held(&irq->irq_lock); retry: vcpu = vgic_target_oracle(irq); @@ -702,7 +702,7 @@ static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) static inline void vgic_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) { - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock)); + lockdep_assert_held(&irq->irq_lock); if (kvm_vgic_global_state.type == VGIC_V2) vgic_v2_populate_lr(vcpu, irq, lr); @@ -736,7 +736,7 @@ static int compute_ap_list_depth(struct kvm_vcpu *vcpu, *multi_sgi = false; - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); + lockdep_assert_held(&vgic_cpu->ap_list_lock); list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { int w; @@ -761,7 +761,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) bool multi_sgi; u8 prio = 0xff; - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); + lockdep_assert_held(&vgic_cpu->ap_list_lock); count = compute_ap_list_depth(vcpu, &multi_sgi); if (count > kvm_vgic_global_state.nr_lr || multi_sgi) |