72 files changed, 2989 insertions, 690 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index f4855974f325..188809580d5e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4033,7 +4033,7 @@ S:	Supported
 F:	drivers/net/ethernet/chelsio/cxgb4vf/
 
 CXL (IBM Coherent Accelerator Processor Interface CAPI) DRIVER
-M:	Frederic Barrat <fbarrat@linux.vnet.ibm.com>
+M:	Frederic Barrat <fbarrat@linux.ibm.com>
 M:	Andrew Donnellan <andrew.donnellan@au1.ibm.com>
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Supported
@@ -4045,9 +4045,9 @@ F:	Documentation/powerpc/cxl.txt
 F:	Documentation/ABI/testing/sysfs-class-cxl
 
 CXLFLASH (IBM Coherent Accelerator Processor Interface CAPI Flash) SCSI DRIVER
-M:	Manoj N. Kumar <manoj@linux.vnet.ibm.com>
-M:	Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
-M:	Uma Krishnan <ukrishn@linux.vnet.ibm.com>
+M:	Manoj N. Kumar <manoj@linux.ibm.com>
+M:	Matthew R. Ochs <mrochs@linux.ibm.com>
+M:	Uma Krishnan <ukrishn@linux.ibm.com>
 L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/cxlflash/
@@ -5428,7 +5428,7 @@ S:	Orphan
 F:	fs/efs/
 
 EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER
-M:	Douglas Miller <dougmill@linux.vnet.ibm.com>
+M:	Douglas Miller <dougmill@linux.ibm.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/ibm/ehea/
@@ -5565,7 +5565,7 @@ F:	Documentation/filesystems/ext4/ext4.rst
 F:	fs/ext4/
 
 Extended Verification Module (EVM)
-M:	Mimi Zohar <zohar@linux.vnet.ibm.com>
+M:	Mimi Zohar <zohar@linux.ibm.com>
 L:	linux-integrity@vger.kernel.org
 S:	Supported
 F:	security/integrity/evm/
@@ -5775,7 +5775,7 @@ F:	include/linux/firmware.h
 
 FLASH ADAPTER DRIVER (IBM Flash Adapter 900GB Full Height PCI Flash Card)
 M:	Joshua Morris <josh.h.morris@us.ibm.com>
-M:	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+M:	Philip Kelleher <pjk1939@linux.ibm.com>
 S:	Maintained
 F:	drivers/block/rsxx/
 
@@ -6042,7 +6042,7 @@ F:	include/linux/fscrypt*.h
 F:	Documentation/filesystems/fscrypt.rst
 
 FSI-ATTACHED I2C DRIVER
-M:	Eddie James <eajames@linux.vnet.ibm.com>
+M:	Eddie James <eajames@linux.ibm.com>
 L:	linux-i2c@vger.kernel.org
 L:	openbmc@lists.ozlabs.org (moderated for non-subscribers)
 S:	Maintained
@@ -6218,8 +6218,7 @@ S:	Supported
 F:	drivers/uio/uio_pci_generic.c
 
 GENWQE (IBM Generic Workqueue Card)
-M:	Frank Haverkamp <haver@linux.vnet.ibm.com>
-M:	Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
+M:	Frank Haverkamp <haver@linux.ibm.com>
 S:	Supported
 F:	drivers/misc/genwqe/
 
@@ -7001,8 +7000,7 @@ F:	crypto/842.c
 F:	lib/842/
 
 IBM Power in-Nest Crypto Acceleration
-M:	Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com>
-M:	Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com>
+M:	Paulo Flabiano Smorigo <pfsmorigo@linux.ibm.com>
 L:	linux-crypto@vger.kernel.org
 S:	Supported
 F:	drivers/crypto/nx/Makefile
@@ -7019,8 +7017,8 @@ S:	Supported
 F:	drivers/scsi/ipr.*
 
 IBM Power SRIOV Virtual NIC Device Driver
-M:	Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
-M:	John Allen <jallen@linux.vnet.ibm.com>
+M:	Thomas Falcon <tlfalcon@linux.ibm.com>
+M:	John Allen <jallen@linux.ibm.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/ibm/ibmvnic.*
@@ -7035,41 +7033,38 @@ F:	arch/powerpc/include/asm/vas.h
 F:	arch/powerpc/include/uapi/asm/vas.h
 
 IBM Power Virtual Ethernet Device Driver
-M:	Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
+M:	Thomas Falcon <tlfalcon@linux.ibm.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/ibm/ibmveth.*
 
 IBM Power Virtual FC Device Drivers
-M:	Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
+M:	Tyrel Datwyler <tyreld@linux.ibm.com>
 L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/ibmvscsi/ibmvfc*
 
 IBM Power Virtual Management Channel Driver
-M:	Bryant G. Ly <bryantly@linux.vnet.ibm.com>
-M:	Steven Royer <seroyer@linux.vnet.ibm.com>
+M:	Steven Royer <seroyer@linux.ibm.com>
 S:	Supported
 F:	drivers/misc/ibmvmc.*
 
 IBM Power Virtual SCSI Device Drivers
-M:	Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
+M:	Tyrel Datwyler <tyreld@linux.ibm.com>
 L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/ibmvscsi/ibmvscsi*
 F:	include/scsi/viosrp.h
 
 IBM Power Virtual SCSI Device Target Driver
-M:	Bryant G. Ly <bryantly@linux.vnet.ibm.com>
-M:	Michael Cyr <mikecyr@linux.vnet.ibm.com>
+M:	Michael Cyr <mikecyr@linux.ibm.com>
 L:	linux-scsi@vger.kernel.org
 L:	target-devel@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/ibmvscsi_tgt/
 
 IBM Power VMX Cryptographic instructions
-M:	Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com>
-M:	Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com>
+M:	Paulo Flabiano Smorigo <pfsmorigo@linux.ibm.com>
 L:	linux-crypto@vger.kernel.org
 S:	Supported
 F:	drivers/crypto/vmx/Makefile
@@ -7346,7 +7341,7 @@ S:	Maintained
 L:	linux-crypto@vger.kernel.org
 
 INTEGRITY MEASUREMENT ARCHITECTURE (IMA)
-M:	Mimi Zohar <zohar@linux.vnet.ibm.com>
+M:	Mimi Zohar <zohar@linux.ibm.com>
 M:	Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
 L:	linux-integrity@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
@@ -7938,9 +7933,8 @@ S:	Maintained
 F:	drivers/media/platform/rcar_jpu.c
 
 JSM Neo PCI based serial card
-M:	Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
 L:	linux-serial@vger.kernel.org
-S:	Maintained
+S:	Orphan
 F:	drivers/tty/serial/jsm/
 
 K10TEMP HARDWARE MONITORING DRIVER
@@ -8170,7 +8164,7 @@ F:	include/uapi/linux/kexec.h
 F:	kernel/kexec*
 
 KEYS-ENCRYPTED
-M:	Mimi Zohar <zohar@linux.vnet.ibm.com>
+M:	Mimi Zohar <zohar@linux.ibm.com>
 L:	linux-integrity@vger.kernel.org
 L:	keyrings@vger.kernel.org
 S:	Supported
@@ -8179,9 +8173,9 @@ F:	include/keys/encrypted-type.h
 F:	security/keys/encrypted-keys/
 
 KEYS-TRUSTED
-M:	James Bottomley <jejb@linux.vnet.ibm.com>
+M:	James Bottomley <jejb@linux.ibm.com>
 M:      Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
-M:	Mimi Zohar <zohar@linux.vnet.ibm.com>
+M:	Mimi Zohar <zohar@linuxibm.com>
 L:	linux-integrity@vger.kernel.org
 L:	keyrings@vger.kernel.org
 S:	Supported
@@ -8234,7 +8228,7 @@ F:	lib/test_kmod.c
 F:	tools/testing/selftests/kmod/
 
 KPROBES
-M:	Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+M:	Naveen N. Rao <naveen.n.rao@linux.ibm.com>
 M:	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
 M:	"David S. Miller" <davem@davemloft.net>
 M:	Masami Hiramatsu <mhiramat@kernel.org>
@@ -8590,7 +8584,7 @@ M:	Nicholas Piggin <npiggin@gmail.com>
 M:	David Howells <dhowells@redhat.com>
 M:	Jade Alglave <j.alglave@ucl.ac.uk>
 M:	Luc Maranget <luc.maranget@inria.fr>
-M:	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+M:	"Paul E. McKenney" <paulmck@linux.ibm.com>
 R:	Akira Yokosawa <akiyks@gmail.com>
 R:	Daniel Lustig <dlustig@nvidia.com>
 L:	linux-kernel@vger.kernel.org
@@ -9548,7 +9542,7 @@ F:	drivers/platform/x86/mlx-platform.c
 
 MEMBARRIER SUPPORT
 M:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
-M:	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+M:	"Paul E. McKenney" <paulmck@linux.ibm.com>
 L:	linux-kernel@vger.kernel.org
 S:	Supported
 F:	kernel/sched/membarrier.c
@@ -10686,7 +10680,7 @@ S:	Supported
 F:	tools/objtool/
 
 OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER
-M:	Frederic Barrat <fbarrat@linux.vnet.ibm.com>
+M:	Frederic Barrat <fbarrat@linux.ibm.com>
 M:	Andrew Donnellan <andrew.donnellan@au1.ibm.com>
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Supported
@@ -12487,7 +12481,7 @@ S:	Orphan
 F:	drivers/net/wireless/ray*
 
 RCUTORTURE TEST FRAMEWORK
-M:	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+M:	"Paul E. McKenney" <paulmck@linux.ibm.com>
 M:	Josh Triplett <josh@joshtriplett.org>
 R:	Steven Rostedt <rostedt@goodmis.org>
 R:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
@@ -12534,11 +12528,12 @@ F:	arch/x86/include/asm/intel_rdt_sched.h
 F:	Documentation/x86/intel_rdt*
 
 READ-COPY UPDATE (RCU)
-M:	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+M:	"Paul E. McKenney" <paulmck@linux.ibm.com>
 M:	Josh Triplett <josh@joshtriplett.org>
 R:	Steven Rostedt <rostedt@goodmis.org>
 R:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 R:	Lai Jiangshan <jiangshanlai@gmail.com>
+R:	Joel Fernandes <joel@joelfernandes.org>
 L:	linux-kernel@vger.kernel.org
 W:	http://www.rdrop.com/users/paulmck/RCU/
 S:	Supported
@@ -12674,7 +12669,7 @@ F:	include/linux/reset-controller.h
 RESTARTABLE SEQUENCES SUPPORT
 M:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 M:	Peter Zijlstra <peterz@infradead.org>
-M:	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+M:	"Paul E. McKenney" <paulmck@linux.ibm.com>
 M:	Boqun Feng <boqun.feng@gmail.com>
 L:	linux-kernel@vger.kernel.org
 S:	Supported
@@ -13199,7 +13194,7 @@ F:	drivers/scsi/sg.c
 F:	include/scsi/sg.h
 
 SCSI SUBSYSTEM
-M:	"James E.J. Bottomley" <jejb@linux.vnet.ibm.com>
+M:	"James E.J. Bottomley" <jejb@linux.ibm.com>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git
 M:	"Martin K. Petersen" <martin.petersen@oracle.com>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git
@@ -13634,7 +13629,7 @@ F:	mm/sl?b*
 
 SLEEPABLE READ-COPY UPDATE (SRCU)
 M:	Lai Jiangshan <jiangshanlai@gmail.com>
-M:	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+M:	"Paul E. McKenney" <paulmck@linux.ibm.com>
 M:	Josh Triplett <josh@joshtriplett.org>
 R:	Steven Rostedt <rostedt@goodmis.org>
 R:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
@@ -15057,7 +15052,7 @@ F:	drivers/platform/x86/topstar-laptop.c
 
 TORTURE-TEST MODULES
 M:	Davidlohr Bueso <dave@stgolabs.net>
-M:	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+M:	"Paul E. McKenney" <paulmck@linux.ibm.com>
 M:	Josh Triplett <josh@joshtriplett.org>
 L:	linux-kernel@vger.kernel.org
 S:	Supported
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 8cf035e68378..4c01e9a01a74 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -289,7 +289,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
 
 	(*batchp)->ptes[(*batchp)->index++] = hugepte;
 	if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
-		call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
+		call_rcu(&(*batchp)->rcu, hugepd_free_rcu_callback);
 		*batchp = NULL;
 	}
 	put_cpu_var(hugepd_freelist_cur);
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 76d89ee8b428..da64e4b9324e 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -350,7 +350,7 @@ void tlb_table_flush(struct mmu_gather *tlb)
 	struct mmu_table_batch **batch = &tlb->batch;
 
 	if (*batch) {
-		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+		call_rcu(&(*batch)->rcu, tlb_remove_table_rcu);
 		*batch = NULL;
 	}
 }
diff --git a/arch/sparc/oprofile/init.c b/arch/sparc/oprofile/init.c
index f9024bccff16..43730c9b1c86 100644
--- a/arch/sparc/oprofile/init.c
+++ b/arch/sparc/oprofile/init.c
@@ -53,7 +53,7 @@ static void timer_stop(void)
 {
 	nmi_adjust_hz(1);
 	unregister_die_notifier(&profile_timer_exceptions_nb);
-	synchronize_sched();  /* Allow already-started NMIs to complete. */
+	synchronize_rcu();  /* Allow already-started NMIs to complete. */
 }
 
 static int op_nmi_timer_init(struct oprofile_operations *ops)
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 8cd66152cdb0..9df652d3d927 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -59,7 +59,7 @@ static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev)
 {
 	struct pcibios_fwaddrmap *map;
 
-	WARN_ON_SMP(!spin_is_locked(&pcibios_fwaddrmap_lock));
+	lockdep_assert_held(&pcibios_fwaddrmap_lock);
 
 	list_for_each_entry(map, &pcibios_fwaddrmappings, list)
 		if (map->dev == dev)
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index f8ec3d4ba4a8..8eb3c4c9ff67 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -382,7 +382,7 @@ static int pcrypt_cpumask_change_notify(struct notifier_block *self,
 
 	cpumask_copy(new_mask->mask, cpumask->cbcpu);
 	rcu_assign_pointer(pcrypt->cb_cpumask, new_mask);
-	synchronize_rcu_bh();
+	synchronize_rcu();
 
 	free_cpumask_var(old_mask->mask);
 	kfree(old_mask);
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 677618e6f1f7..dc8603d34320 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2187,7 +2187,7 @@ static void shutdown_smi(void *send_info)
 	 * handlers might have been running before we freed the
 	 * interrupt.
 	 */
-	synchronize_sched();
+	synchronize_rcu();
 
 	/*
 	 * Timeouts are stopped, now make sure the interrupts are off
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 6d53f7d9fc7a..ffa9adeaba31 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -346,7 +346,7 @@ static inline void gov_clear_update_util(struct cpufreq_policy *policy)
 	for_each_cpu(i, policy->cpus)
 		cpufreq_remove_update_util_hook(i);
 
-	synchronize_sched();
+	synchronize_rcu();
 }
 
 static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy,
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 9578312e43f2..ed124d72db76 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1930,7 +1930,7 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
 
 	cpufreq_remove_update_util_hook(cpu);
 	cpu_data->update_util_set = false;
-	synchronize_sched();
+	synchronize_rcu();
 }
 
 static int intel_pstate_get_max_freq(struct cpudata *cpu)
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index ffd68a7bc9e1..69d752f0b621 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -1661,7 +1661,7 @@ static void rtl8139_tx_timeout_task (struct work_struct *work)
 
 	napi_disable(&tp->napi);
 	netif_stop_queue(dev);
-	synchronize_sched();
+	synchronize_rcu();
 
 	netdev_dbg(dev, "Transmit timeout, status %02x %04x %04x media %02x\n",
 		   RTL_R8(ChipCmd), RTL_R16(IntrStatus),
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 1fd01688d37b..4f1d89f0dc24 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -5866,7 +5866,7 @@ static void rtl_reset_work(struct rtl8169_private *tp)
 
 	napi_disable(&tp->napi);
 	netif_stop_queue(dev);
-	synchronize_sched();
+	synchronize_rcu();
 
 	rtl8169_hw_reset(tp);
 
@@ -6609,7 +6609,7 @@ static void rtl8169_down(struct net_device *dev)
 	rtl8169_rx_missed(dev);
 
 	/* Give a racing hard_start_xmit a few cycles to complete. */
-	synchronize_sched();
+	synchronize_rcu();
 
 	rtl8169_tx_clear(tp);
 
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 98fe7e762e17..3643015a55cf 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -3167,7 +3167,7 @@ struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx,
 {
 	u32 hash = efx_filter_spec_hash(spec);
 
-	WARN_ON(!spin_is_locked(&efx->rps_hash_lock));
+	lockdep_assert_held(&efx->rps_hash_lock);
 	if (!efx->rps_hash_table)
 		return NULL;
 	return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE];
diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c
index c2c50522b96d..808cf9816673 100644
--- a/drivers/net/ethernet/sis/sis190.c
+++ b/drivers/net/ethernet/sis/sis190.c
@@ -1142,7 +1142,7 @@ static void sis190_down(struct net_device *dev)
 		if (!poll_locked)
 			poll_locked++;
 
-		synchronize_sched();
+		synchronize_rcu();
 
 	} while (SIS_R32(IntrMask));
 
diff --git a/drivers/net/ethernet/smsc/smsc911x.h b/drivers/net/ethernet/smsc/smsc911x.h
index 8d75508acd2b..51b2fc1a395f 100644
--- a/drivers/net/ethernet/smsc/smsc911x.h
+++ b/drivers/net/ethernet/smsc/smsc911x.h
@@ -67,7 +67,7 @@
 
 #ifdef CONFIG_DEBUG_SPINLOCK
 #define SMSC_ASSERT_MAC_LOCK(pdata) \
-		WARN_ON_SMP(!spin_is_locked(&pdata->mac_lock))
+		lockdep_assert_held(&pdata->mac_lock)
 #else
 #define SMSC_ASSERT_MAC_LOCK(pdata) do {} while (0)
 #endif				/* CONFIG_DEBUG_SPINLOCK */
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index ab11b2bee273..564ead864028 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1359,7 +1359,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
 	if (rx_sock)
 		sockfd_put(rx_sock);
 	/* Make sure no callbacks are outstanding */
-	synchronize_rcu_bh();
+	synchronize_rcu();
 	/* We do an extra flush before freeing memory,
 	 * since jobs can re-queue themselves. */
 	vhost_net_flush(n);
diff --git a/fs/file.c b/fs/file.c
index 7ffd6e9d103d..50304c7525ea 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -158,7 +158,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
 	 * or have finished their rcu_read_lock_sched() section.
 	 */
 	if (atomic_read(&files->count) > 1)
-		synchronize_sched();
+		synchronize_rcu();
 
 	spin_lock(&files->file_lock);
 	if (!new_fdt)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 356d2b8568c1..681881dc8a9d 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -926,7 +926,7 @@ static inline struct userfaultfd_wait_queue *find_userfault_in(
 	wait_queue_entry_t *wq;
 	struct userfaultfd_wait_queue *uwq;
 
-	VM_BUG_ON(!spin_is_locked(&wqh->lock));
+	lockdep_assert_held(&wqh->lock);
 
 	uwq = NULL;
 	if (!waitqueue_active(wqh))
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 79b99d653e03..71b75643c432 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -41,7 +41,7 @@ static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *
 	 * cannot both change sem->state from readers_fast and start checking
 	 * counters while we are here. So if we see !sem->state, we know that
 	 * the writer won't be checking until we're past the preempt_enable()
-	 * and that one the synchronize_sched() is done, the writer will see
+	 * and that once the synchronize_rcu() is done, the writer will see
 	 * anything we did within this RCU-sched read-size critical section.
 	 */
 	__this_cpu_inc(*sem->read_count);
diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
index 8a16c3eb3dd0..c0578ba23c1a 100644
--- a/include/linux/rcupdate_wait.h
+++ b/include/linux/rcupdate_wait.h
@@ -31,21 +31,4 @@ do {									\
 
 #define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__)
 
-/**
- * synchronize_rcu_mult - Wait concurrently for multiple grace periods
- * @...: List of call_rcu() functions for different grace periods to wait on
- *
- * This macro waits concurrently for multiple types of RCU grace periods.
- * For example, synchronize_rcu_mult(call_rcu, call_rcu_tasks) would wait
- * on concurrent RCU and RCU-tasks grace periods.  Waiting on a give SRCU
- * domain requires you to write a wrapper function for that SRCU domain's
- * call_srcu() function, supplying the corresponding srcu_struct.
- *
- * If Tiny RCU, tell _wait_rcu_gp() does not bother waiting for RCU,
- * given that anywhere synchronize_rcu_mult() can be called is automatically
- * a grace period.
- */
-#define synchronize_rcu_mult(...) \
-	_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__)
-
 #endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a51c13c2b1a0..e4c7b6241088 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -572,8 +572,10 @@ union rcu_special {
 	struct {
 		u8			blocked;
 		u8			need_qs;
+		u8			exp_hint; /* Hint for performance. */
+		u8			pad; /* No garbage from compiler! */
 	} b; /* Bits. */
-	u16 s; /* Set of bits. */
+	u32 s; /* Set of bits. */
 };
 
 enum perf_event_task_context {
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 67135d4a8a30..c614375cd264 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -38,20 +38,20 @@ struct srcu_struct;
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
-int __init_srcu_struct(struct srcu_struct *sp, const char *name,
+int __init_srcu_struct(struct srcu_struct *ssp, const char *name,
 		       struct lock_class_key *key);
 
-#define init_srcu_struct(sp) \
+#define init_srcu_struct(ssp) \
 ({ \
 	static struct lock_class_key __srcu_key; \
 	\
-	__init_srcu_struct((sp), #sp, &__srcu_key); \
+	__init_srcu_struct((ssp), #ssp, &__srcu_key); \
 })
 
 #define __SRCU_DEP_MAP_INIT(srcu_name)	.dep_map = { .name = #srcu_name },
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-int init_srcu_struct(struct srcu_struct *sp);
+int init_srcu_struct(struct srcu_struct *ssp);
 
 #define __SRCU_DEP_MAP_INIT(srcu_name)
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -67,28 +67,28 @@ int init_srcu_struct(struct srcu_struct *sp);
 struct srcu_struct { };
 #endif
 
-void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
+void call_srcu(struct srcu_struct *ssp, struct rcu_head *head,
 		void (*func)(struct rcu_head *head));
-void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced);
-int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
-void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
-void synchronize_srcu(struct srcu_struct *sp);
+void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced);
+int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
+void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
+void synchronize_srcu(struct srcu_struct *ssp);
 
 /**
  * cleanup_srcu_struct - deconstruct a sleep-RCU structure
- * @sp: structure to clean up.
+ * @ssp: structure to clean up.
  *
  * Must invoke this after you are finished using a given srcu_struct that
  * was initialized via init_srcu_struct(), else you leak memory.
  */
-static inline void cleanup_srcu_struct(struct srcu_struct *sp)
+static inline void cleanup_srcu_struct(struct srcu_struct *ssp)
 {
-	_cleanup_srcu_struct(sp, false);
+	_cleanup_srcu_struct(ssp, false);
 }
 
 /**
  * cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure
- * @sp: structure to clean up.
+ * @ssp: structure to clean up.
  *
  * Must invoke this after you are finished using a given srcu_struct that
  * was initialized via init_srcu_struct(), else you leak memory.  Also,
@@ -103,16 +103,16 @@ static inline void cleanup_srcu_struct(struct srcu_struct *sp)
  * (with high probability, anyway), and will also cause the srcu_struct
  * to be leaked.
  */
-static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp)
+static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *ssp)
 {
-	_cleanup_srcu_struct(sp, true);
+	_cleanup_srcu_struct(ssp, true);
 }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 /**
  * srcu_read_lock_held - might we be in SRCU read-side critical section?
- * @sp: The srcu_struct structure to check
+ * @ssp: The srcu_struct structure to check
  *
  * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an SRCU
  * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
@@ -126,16 +126,16 @@ static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp)
  * relies on normal RCU, it can be called from the CPU which
  * is in the idle loop from an RCU point of view or offline.
  */
-static inline int srcu_read_lock_held(const struct srcu_struct *sp)
+static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
 {
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
-	return lock_is_held(&sp->dep_map);
+	return lock_is_held(&ssp->dep_map);
 }
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-static inline int srcu_read_lock_held(const struct srcu_struct *sp)
+static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
 {
 	return 1;
 }
@@ -145,7 +145,7 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp)
 /**
  * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing
  * @p: the pointer to fetch and protect for later dereferencing
- * @sp: pointer to the srcu_struct, which is used to check that we
+ * @ssp: pointer to the srcu_struct, which is used to check that we
  *	really are in an SRCU read-side critical section.
  * @c: condition to check for update-side use
  *
@@ -154,29 +154,32 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp)
  * to 1.  The @c argument will normally be a logical expression containing
  * lockdep_is_held() calls.
  */
-#define srcu_dereference_check(p, sp, c) \
-	__rcu_dereference_check((p), (c) || srcu_read_lock_held(sp), __rcu)
+#define srcu_dereference_check(p, ssp, c) \
+	__rcu_dereference_check((p), (c) || srcu_read_lock_held(ssp), __rcu)
 
 /**
  * srcu_dereference - fetch SRCU-protected pointer for later dereferencing
  * @p: the pointer to fetch and protect for later dereferencing
- * @sp: pointer to the srcu_struct, which is used to check that we
+ * @ssp: pointer to the srcu_struct, which is used to check that we
  *	really are in an SRCU read-side critical section.
  *
  * Makes rcu_dereference_check() do the dirty work.  If PROVE_RCU
  * is enabled, invoking this outside of an RCU read-side critical
  * section will result in an RCU-lockdep splat.
  */
-#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0)
+#define srcu_dereference(p, ssp) srcu_dereference_check((p), (ssp), 0)
 
 /**
  * srcu_dereference_notrace - no tracing and no lockdep calls from here
+ * @p: the pointer to fetch and protect for later dereferencing
+ * @ssp: pointer to the srcu_struct, which is used to check that we
+ *	really are in an SRCU read-side critical section.
  */
-#define srcu_dereference_notrace(p, sp) srcu_dereference_check((p), (sp), 1)
+#define srcu_dereference_notrace(p, ssp) srcu_dereference_check((p), (ssp), 1)
 
 /**
  * srcu_read_lock - register a new reader for an SRCU-protected structure.
- * @sp: srcu_struct in which to register the new reader.
+ * @ssp: srcu_struct in which to register the new reader.
  *
  * Enter an SRCU read-side critical section.  Note that SRCU read-side
  * critical sections may be nested.  However, it is illegal to
@@ -191,44 +194,44 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp)
  * srcu_read_unlock() in an irq handler if the matching srcu_read_lock()
  * was invoked in process context.
  */
-static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
+static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
 {
 	int retval;
 
-	retval = __srcu_read_lock(sp);
-	rcu_lock_acquire(&(sp)->dep_map);
+	retval = __srcu_read_lock(ssp);
+	rcu_lock_acquire(&(ssp)->dep_map);
 	return retval;
 }
 
 /* Used by tracing, cannot be traced and cannot invoke lockdep. */
 static inline notrace int
-srcu_read_lock_notrace(struct srcu_struct *sp) __acquires(sp)
+srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
 {
 	int retval;
 
-	retval = __srcu_read_lock(sp);
+	retval = __srcu_read_lock(ssp);
 	return retval;
 }
 
 /**
  * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
- * @sp: srcu_struct in which to unregister the old reader.
+ * @ssp: srcu_struct in which to unregister the old reader.
  * @idx: return value from corresponding srcu_read_lock().
  *
  * Exit an SRCU read-side critical section.
  */
-static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
-	__releases(sp)
+static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
+	__releases(ssp)
 {
-	rcu_lock_release(&(sp)->dep_map);
-	__srcu_read_unlock(sp, idx);
+	rcu_lock_release(&(ssp)->dep_map);
+	__srcu_read_unlock(ssp, idx);
 }
 
 /* Used by tracing, cannot be traced and cannot call lockdep. */
 static inline notrace void
-srcu_read_unlock_notrace(struct srcu_struct *sp, int idx) __releases(sp)
+srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp)
 {
-	__srcu_read_unlock(sp, idx);
+	__srcu_read_unlock(ssp, idx);
 }
 
 /**
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index f41d2fb09f87..b19216aaaef2 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -60,7 +60,7 @@ void srcu_drive_gp(struct work_struct *wp);
 #define DEFINE_STATIC_SRCU(name) \
 	static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name)
 
-void synchronize_srcu(struct srcu_struct *sp);
+void synchronize_srcu(struct srcu_struct *ssp);
 
 /*
  * Counts the new reader in the appropriate per-CPU element of the
@@ -68,36 +68,36 @@ void synchronize_srcu(struct srcu_struct *sp);
  * __srcu_read_unlock() must be in the same handler instance.  Returns an
  * index that must be passed to the matching srcu_read_unlock().
  */
-static inline int __srcu_read_lock(struct srcu_struct *sp)
+static inline int __srcu_read_lock(struct srcu_struct *ssp)
 {
 	int idx;
 
-	idx = READ_ONCE(sp->srcu_idx);
-	WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1);
+	idx = READ_ONCE(ssp->srcu_idx);
+	WRITE_ONCE(ssp->srcu_lock_nesting[idx], ssp->srcu_lock_nesting[idx] + 1);
 	return idx;
 }
 
-static inline void synchronize_srcu_expedited(struct srcu_struct *sp)
+static inline void synchronize_srcu_expedited(struct srcu_struct *ssp)
 {
-	synchronize_srcu(sp);
+	synchronize_srcu(ssp);
 }
 
-static inline void srcu_barrier(struct srcu_struct *sp)
+static inline void srcu_barrier(struct srcu_struct *ssp)
 {
-	synchronize_srcu(sp);
+	synchronize_srcu(ssp);
 }
 
 /* Defined here to avoid size increase for non-torture kernels. */
-static inline void srcu_torture_stats_print(struct srcu_struct *sp,
+static inline void srcu_torture_stats_print(struct srcu_struct *ssp,
 					    char *tt, char *tf)
 {
 	int idx;
 
-	idx = READ_ONCE(sp->srcu_idx) & 0x1;
+	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
 	pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n",
 		 tt, tf, idx,
-		 READ_ONCE(sp->srcu_lock_nesting[!idx]),
-		 READ_ONCE(sp->srcu_lock_nesting[idx]));
+		 READ_ONCE(ssp->srcu_lock_nesting[!idx]),
+		 READ_ONCE(ssp->srcu_lock_nesting[idx]));
 }
 
 #endif
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 0ae91b3a7406..6f292bd3e7db 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -51,7 +51,7 @@ struct srcu_data {
 	unsigned long grpmask;			/* Mask for leaf srcu_node */
 						/*  ->srcu_data_have_cbs[]. */
 	int cpu;
-	struct srcu_struct *sp;
+	struct srcu_struct *ssp;
 };
 
 /*
@@ -138,8 +138,8 @@ struct srcu_struct {
 #define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */)
 #define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static)
 
-void synchronize_srcu_expedited(struct srcu_struct *sp);
-void srcu_barrier(struct srcu_struct *sp);
-void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf);
+void synchronize_srcu_expedited(struct srcu_struct *ssp);
+void srcu_barrier(struct srcu_struct *ssp);
+void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
 
 #endif
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 538ba1a58f5b..432080b59c26 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -82,7 +82,7 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb)
 static inline void tracepoint_synchronize_unregister(void)
 {
 	synchronize_srcu(&tracepoint_srcu);
-	synchronize_sched();
+	synchronize_rcu();
 }
 #else
 static inline void tracepoint_synchronize_unregister(void)
diff --git a/include/linux/types.h b/include/linux/types.h
index 9834e90aa010..c2615d6a019e 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -212,8 +212,8 @@ struct ustat {
  * weird ABI and we need to ask it explicitly.
  *
  * The alignment is required to guarantee that bit 0 of @next will be
- * clear under normal conditions -- as long as we use call_rcu(),
- * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback.
+ * clear under normal conditions -- as long as we use call_rcu() or
+ * call_srcu() to queue the callback.
  *
  * This guarantee is important for few reasons:
  *  - future call_rcu_lazy() will make use of lower bits in the pointer;
diff --git a/init/main.c b/init/main.c
index ee147103ba1b..a45486330243 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1046,12 +1046,12 @@ static void mark_readonly(void)
 {
 	if (rodata_enabled) {
 		/*
-		 * load_module() results in W+X mappings, which are cleaned up
-		 * with call_rcu_sched().  Let's make sure that queued work is
+		 * load_module() results in W+X mappings, which are cleaned
+		 * up with call_rcu().  Let's make sure that queued work is
 		 * flushed so that we don't hit false positives looking for
 		 * insecure pages which are W+X.
 		 */
-		rcu_barrier_sched();
+		rcu_barrier();
 		mark_rodata_ro();
 		rodata_test();
 	} else
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 6aaf5dd5383b..7a8429f8e280 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5343,7 +5343,7 @@ int __init cgroup_init(void)
 	cgroup_rstat_boot();
 
 	/*
-	 * The latency of the synchronize_sched() is too high for cgroups,
+	 * The latency of the synchronize_rcu() is too high for cgroups,
 	 * avoid it at the cost of forcing all readers into the slow path.
 	 */
 	rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 84530ab358c3..c4b90cf7734a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9918,7 +9918,7 @@ static void account_event(struct perf_event *event)
 			 * call the perf scheduling hooks before proceeding to
 			 * install events that need them.
 			 */
-			synchronize_sched();
+			synchronize_rcu();
 		}
 		/*
 		 * Now that we have waited for the sync_sched(), allow further
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 90e98e233647..08e31d863191 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -229,7 +229,7 @@ static int collect_garbage_slots(struct kprobe_insn_cache *c)
 	struct kprobe_insn_page *kip, *next;
 
 	/* Ensure no-one is interrupted on the garbages */
-	synchronize_sched();
+	synchronize_rcu();
 
 	list_for_each_entry_safe(kip, next, &c->pages, list) {
 		int i;
@@ -1382,7 +1382,7 @@ out:
 			if (ret) {
 				ap->flags |= KPROBE_FLAG_DISABLED;
 				list_del_rcu(&p->list);
-				synchronize_sched();
+				synchronize_rcu();
 			}
 		}
 	}
@@ -1597,7 +1597,7 @@ int register_kprobe(struct kprobe *p)
 		ret = arm_kprobe(p);
 		if (ret) {
 			hlist_del_rcu(&p->hlist);
-			synchronize_sched();
+			synchronize_rcu();
 			goto out;
 		}
 	}
@@ -1776,7 +1776,7 @@ void unregister_kprobes(struct kprobe **kps, int num)
 			kps[i]->addr = NULL;
 	mutex_unlock(&kprobe_mutex);
 
-	synchronize_sched();
+	synchronize_rcu();
 	for (i = 0; i < num; i++)
 		if (kps[i]->addr)
 			__unregister_kprobe_bottom(kps[i]);
@@ -1966,7 +1966,7 @@ void unregister_kretprobes(struct kretprobe **rps, int num)
 			rps[i]->kp.addr = NULL;
 	mutex_unlock(&kprobe_mutex);
 
-	synchronize_sched();
+	synchronize_rcu();
 	for (i = 0; i < num; i++) {
 		if (rps[i]->kp.addr) {
 			__unregister_kprobe_bottom(&rps[i]->kp);
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index 82d584225dc6..7702cb4064fc 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -61,7 +61,7 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 	ops = container_of(fops, struct klp_ops, fops);
 
 	/*
-	 * A variant of synchronize_sched() is used to allow patching functions
+	 * A variant of synchronize_rcu() is used to allow patching functions
 	 * where RCU is not watching, see klp_synchronize_transition().
 	 */
 	preempt_disable_notrace();
@@ -72,7 +72,7 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 	/*
 	 * func should never be NULL because preemption should be disabled here
 	 * and unregister_ftrace_function() does the equivalent of a
-	 * synchronize_sched() before the func_stack removal.
+	 * synchronize_rcu() before the func_stack removal.
 	 */
 	if (WARN_ON_ONCE(!func))
 		goto unlock;
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index 5bc349805e03..304d5eb8a98c 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -52,7 +52,7 @@ static DECLARE_DELAYED_WORK(klp_transition_work, klp_transition_work_fn);
 
 /*
  * This function is just a stub to implement a hard force
- * of synchronize_sched(). This requires synchronizing
+ * of synchronize_rcu(). This requires synchronizing
  * tasks even in userspace and idle.
  */
 static void klp_sync(struct work_struct *work)
@@ -175,7 +175,7 @@ void klp_cancel_transition(void)
 void klp_update_patch_state(struct task_struct *task)
 {
 	/*
-	 * A variant of synchronize_sched() is used to allow patching functions
+	 * A variant of synchronize_rcu() is used to allow patching functions
 	 * where RCU is not watching, see klp_synchronize_transition().
 	 */
 	preempt_disable_notrace();
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 1efada2dd9dd..ef27f98714c0 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4195,7 +4195,7 @@ void lockdep_free_key_range(void *start, unsigned long size)
 	 *
 	 * sync_sched() is sufficient because the read-side is IRQ disable.
 	 */
-	synchronize_sched();
+	synchronize_rcu();
 
 	/*
 	 * XXX at this point we could return the resources to the pool;
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index 9aa713629387..771d4ca96dda 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -36,7 +36,7 @@ void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
 
 void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
 {
-	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
+	lockdep_assert_held(&lock->wait_lock);
 	DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
 	DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
 	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
@@ -51,7 +51,7 @@ void debug_mutex_free_waiter(struct mutex_waiter *waiter)
 void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 			    struct task_struct *task)
 {
-	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
+	lockdep_assert_held(&lock->wait_lock);
 
 	/* Mark the current thread as blocked on the lock: */
 	task->blocked_on = waiter;
diff --git a/kernel/module.c b/kernel/module.c
index 49a405891587..99b46c32d579 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2159,7 +2159,7 @@ static void free_module(struct module *mod)
 	/* Remove this module from bug list, this uses list_del_rcu */
 	module_bug_cleanup(mod);
 	/* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */
-	synchronize_sched();
+	synchronize_rcu();
 	mutex_unlock(&module_mutex);
 
 	/* This may be empty, but that's OK */
@@ -3507,15 +3507,15 @@ static noinline int do_init_module(struct module *mod)
 	/*
 	 * We want to free module_init, but be aware that kallsyms may be
 	 * walking this with preempt disabled.  In all the failure paths, we
-	 * call synchronize_sched(), but we don't want to slow down the success
+	 * call synchronize_rcu(), but we don't want to slow down the success
 	 * path, so use actual RCU here.
 	 * Note that module_alloc() on most architectures creates W+X page
 	 * mappings which won't be cleaned up until do_free_init() runs.  Any
 	 * code such as mark_rodata_ro() which depends on those mappings to
 	 * be cleaned up needs to sync with the queued work - ie
-	 * rcu_barrier_sched()
+	 * rcu_barrier()
 	 */
-	call_rcu_sched(&freeinit->rcu, do_free_init);
+	call_rcu(&freeinit->rcu, do_free_init);
 	mutex_unlock(&module_mutex);
 	wake_up_all(&module_wq);
 
@@ -3526,7 +3526,7 @@ fail_free_freeinit:
 fail:
 	/* Try to protect us from buggy refcounters. */
 	mod->state = MODULE_STATE_GOING;
-	synchronize_sched();
+	synchronize_rcu();
 	module_put(mod);
 	blocking_notifier_call_chain(&module_notify_list,
 				     MODULE_STATE_GOING, mod);
@@ -3819,7 +3819,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
  ddebug_cleanup:
 	ftrace_release_mod(mod);
 	dynamic_debug_remove(mod, info->debug);
-	synchronize_sched();
+	synchronize_rcu();
 	kfree(mod->args);
  free_arch_cleanup:
 	module_arch_cleanup(mod);
@@ -3834,7 +3834,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
 	mod_tree_remove(mod);
 	wake_up_all(&module_wq);
 	/* Wait for RCU-sched synchronizing before releasing mod->list. */
-	synchronize_sched();
+	synchronize_rcu();
 	mutex_unlock(&module_mutex);
  free_module:
 	/* Free lock-classes; relies on the preceding sync_rcu() */
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index b46e6683f8c9..32dfd6522548 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -37,30 +37,30 @@ int rcu_scheduler_active __read_mostly;
 static LIST_HEAD(srcu_boot_list);
 static bool srcu_init_done;
 
-static int init_srcu_struct_fields(struct srcu_struct *sp)
+static int init_srcu_struct_fields(struct srcu_struct *ssp)
 {
-	sp->srcu_lock_nesting[0] = 0;
-	sp->srcu_lock_nesting[1] = 0;
-	init_swait_queue_head(&sp->srcu_wq);
-	sp->srcu_cb_head = NULL;
-	sp->srcu_cb_tail = &sp->srcu_cb_head;
-	sp->srcu_gp_running = false;
-	sp->srcu_gp_waiting = false;
-	sp->srcu_idx = 0;
-	INIT_WORK(&sp->srcu_work, srcu_drive_gp);
-	INIT_LIST_HEAD(&sp->srcu_work.entry);
+	ssp->srcu_lock_nesting[0] = 0;
+	ssp->srcu_lock_nesting[1] = 0;
+	init_swait_queue_head(&ssp->srcu_wq);
+	ssp->srcu_cb_head = NULL;
+	ssp->srcu_cb_tail = &ssp->srcu_cb_head;
+	ssp->srcu_gp_running = false;
+	ssp->srcu_gp_waiting = false;
+	ssp->srcu_idx = 0;
+	INIT_WORK(&ssp->srcu_work, srcu_drive_gp);
+	INIT_LIST_HEAD(&ssp->srcu_work.entry);
 	return 0;
 }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
-int __init_srcu_struct(struct srcu_struct *sp, const char *name,
+int __init_srcu_struct(struct srcu_struct *ssp, const char *name,
 		       struct lock_class_key *key)
 {
 	/* Don't re-initialize a lock while it is held. */
-	debug_check_no_locks_freed((void *)sp, sizeof(*sp));
-	lockdep_init_map(&sp->dep_map, name, key, 0);
-	return init_srcu_struct_fields(sp);
+	debug_check_no_locks_freed((void *)ssp, sizeof(*ssp));
+	lockdep_init_map(&ssp->dep_map, name, key, 0);
+	return init_srcu_struct_fields(ssp);
 }
 EXPORT_SYMBOL_GPL(__init_srcu_struct);
 
@@ -68,15 +68,15 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct);
 
 /*
  * init_srcu_struct - initialize a sleep-RCU structure
- * @sp: structure to initialize.
+ * @ssp: structure to initialize.
  *
  * Must invoke this on a given srcu_struct before passing that srcu_struct
  * to any other function.  Each srcu_struct represents a separate domain
  * of SRCU protection.
  */
-int init_srcu_struct(struct srcu_struct *sp)
+int init_srcu_struct(struct srcu_struct *ssp)
 {
-	return init_srcu_struct_fields(sp);
+	return init_srcu_struct_fields(ssp);
 }
 EXPORT_SYMBOL_GPL(init_srcu_struct);
 
@@ -84,22 +84,22 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
 
 /*
  * cleanup_srcu_struct - deconstruct a sleep-RCU structure
- * @sp: structure to clean up.
+ * @ssp: structure to clean up.
  *
  * Must invoke this after you are finished using a given srcu_struct that
  * was initialized via init_srcu_struct(), else you leak memory.
  */
-void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
+void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
 {
-	WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]);
+	WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]);
 	if (quiesced)
-		WARN_ON(work_pending(&sp->srcu_work));
+		WARN_ON(work_pending(&ssp->srcu_work));
 	else
-		flush_work(&sp->srcu_work);
-	WARN_ON(sp->srcu_gp_running);
-	WARN_ON(sp->srcu_gp_waiting);
-	WARN_ON(sp->srcu_cb_head);
-	WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail);
+		flush_work(&ssp->srcu_work);
+	WARN_ON(ssp->srcu_gp_running);
+	WARN_ON(ssp->srcu_gp_waiting);
+	WARN_ON(ssp->srcu_cb_head);
+	WARN_ON(&ssp->srcu_cb_head != ssp->srcu_cb_tail);
 }
 EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
 
@@ -107,13 +107,13 @@ EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
  * Removes the count for the old reader from the appropriate element of
  * the srcu_struct.
  */
-void __srcu_read_unlock(struct srcu_struct *sp, int idx)
+void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
 {
-	int newval = sp->srcu_lock_nesting[idx] - 1;
+	int newval = ssp->srcu_lock_nesting[idx] - 1;
 
-	WRITE_ONCE(sp->srcu_lock_nesting[idx], newval);
-	if (!newval && READ_ONCE(sp->srcu_gp_waiting))
-		swake_up_one(&sp->srcu_wq);
+	WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval);
+	if (!newval && READ_ONCE(ssp->srcu_gp_waiting))
+		swake_up_one(&ssp->srcu_wq);
 }
 EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 
@@ -127,24 +127,24 @@ void srcu_drive_gp(struct work_struct *wp)
 	int idx;
 	struct rcu_head *lh;
 	struct rcu_head *rhp;
-	struct srcu_struct *sp;
+	struct srcu_struct *ssp;
 
-	sp = container_of(wp, struct srcu_struct, srcu_work);
-	if (sp->srcu_gp_running || !READ_ONCE(sp->srcu_cb_head))
+	ssp = container_of(wp, struct srcu_struct, srcu_work);
+	if (ssp->srcu_gp_running || !READ_ONCE(ssp->srcu_cb_head))
 		return; /* Already running or nothing to do. */
 
 	/* Remove recently arrived callbacks and wait for readers. */
-	WRITE_ONCE(sp->srcu_gp_running, true);
+	WRITE_ONCE(ssp->srcu_gp_running, true);
 	local_irq_disable();
-	lh = sp->srcu_cb_head;
-	sp->srcu_cb_head = NULL;
-	sp->srcu_cb_tail = &sp->srcu_cb_head;
+	lh = ssp->srcu_cb_head;
+	ssp->srcu_cb_head = NULL;
+	ssp->srcu_cb_tail = &ssp->srcu_cb_head;
 	local_irq_enable();
-	idx = sp->srcu_idx;
-	WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx);
-	WRITE_ONCE(sp->srcu_gp_waiting, true);  /* srcu_read_unlock() wakes! */
-	swait_event_exclusive(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx]));
-	WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
+	idx = ssp->srcu_idx;
+	WRITE_ONCE(ssp->srcu_idx, !ssp->srcu_idx);
+	WRITE_ONCE(ssp->srcu_gp_waiting, true);  /* srcu_read_unlock() wakes! */
+	swait_event_exclusive(ssp->srcu_wq, !READ_ONCE(ssp->srcu_lock_nesting[idx]));
+	WRITE_ONCE(ssp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
 
 	/* Invoke the callbacks we removed above. */
 	while (lh) {
@@ -161,9 +161,9 @@ void srcu_drive_gp(struct work_struct *wp)
 	 * at interrupt level, but the ->srcu_gp_running checks will
 	 * straighten that out.
 	 */
-	WRITE_ONCE(sp->srcu_gp_running, false);
-	if (READ_ONCE(sp->srcu_cb_head))
-		schedule_work(&sp->srcu_work);
+	WRITE_ONCE(ssp->srcu_gp_running, false);
+	if (READ_ONCE(ssp->srcu_cb_head))
+		schedule_work(&ssp->srcu_work);
 }
 EXPORT_SYMBOL_GPL(srcu_drive_gp);
 
@@ -171,7 +171,7 @@ EXPORT_SYMBOL_GPL(srcu_drive_gp);
  * Enqueue an SRCU callback on the specified srcu_struct structure,
  * initiating grace-period processing if it is not already running.
  */
-void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
+void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
 	       rcu_callback_t func)
 {
 	unsigned long flags;
@@ -179,14 +179,14 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 	rhp->func = func;
 	rhp->next = NULL;
 	local_irq_save(flags);
-	*sp->srcu_cb_tail = rhp;
-	sp->srcu_cb_tail = &rhp->next;
+	*ssp->srcu_cb_tail = rhp;
+	ssp->srcu_cb_tail = &rhp->next;
 	local_irq_restore(flags);
-	if (!READ_ONCE(sp->srcu_gp_running)) {
+	if (!READ_ONCE(ssp->srcu_gp_running)) {
 		if (likely(srcu_init_done))
-			schedule_work(&sp->srcu_work);
-		else if (list_empty(&sp->srcu_work.entry))
-			list_add(&sp->srcu_work.entry, &srcu_boot_list);
+			schedule_work(&ssp->srcu_work);
+		else if (list_empty(&ssp->srcu_work.entry))
+			list_add(&ssp->srcu_work.entry, &srcu_boot_list);
 	}
 }
 EXPORT_SYMBOL_GPL(call_srcu);
@@ -194,13 +194,13 @@ EXPORT_SYMBOL_GPL(call_srcu);
 /*
  * synchronize_srcu - wait for prior SRCU read-side critical-section completion
  */
-void synchronize_srcu(struct srcu_struct *sp)
+void synchronize_srcu(struct srcu_struct *ssp)
 {
 	struct rcu_synchronize rs;
 
 	init_rcu_head_on_stack(&rs.head);
 	init_completion(&rs.completion);
-	call_srcu(sp, &rs.head, wakeme_after_rcu);
+	call_srcu(ssp, &rs.head, wakeme_after_rcu);
 	wait_for_completion(&rs.completion);
 	destroy_rcu_head_on_stack(&rs.head);
 }
@@ -219,13 +219,13 @@ void __init rcu_scheduler_starting(void)
  */
 void __init srcu_init(void)
 {
-	struct srcu_struct *sp;
+	struct srcu_struct *ssp;
 
 	srcu_init_done = true;
 	while (!list_empty(&srcu_boot_list)) {
-		sp = list_first_entry(&srcu_boot_list,
+		ssp = list_first_entry(&srcu_boot_list,
 				      struct srcu_struct, srcu_work.entry);
-		list_del_init(&sp->srcu_work.entry);
-		schedule_work(&sp->srcu_work);
+		list_del_init(&ssp->srcu_work.entry);
+		schedule_work(&ssp->srcu_work);
 	}
 }
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index a8846ed7f352..3600d88d8956 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -56,7 +56,7 @@ static LIST_HEAD(srcu_boot_list);
 static bool __read_mostly srcu_init_done;
 
 static void srcu_invoke_callbacks(struct work_struct *work);
-static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
+static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
 static void process_srcu(struct work_struct *work);
 
 /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
@@ -92,7 +92,7 @@ do {									\
  * srcu_read_unlock() running against them.  So if the is_static parameter
  * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
  */
-static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
+static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
 {
 	int cpu;
 	int i;
@@ -103,13 +103,13 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
 	struct srcu_node *snp_first;
 
 	/* Work out the overall tree geometry. */
-	sp->level[0] = &sp->node[0];
+	ssp->level[0] = &ssp->node[0];
 	for (i = 1; i < rcu_num_lvls; i++)
-		sp->level[i] = sp->level[i - 1] + num_rcu_lvl[i - 1];
+		ssp->level[i] = ssp->level[i - 1] + num_rcu_lvl[i - 1];
 	rcu_init_levelspread(levelspread, num_rcu_lvl);
 
 	/* Each pass through this loop initializes one srcu_node structure. */
-	srcu_for_each_node_breadth_first(sp, snp) {
+	srcu_for_each_node_breadth_first(ssp, snp) {
 		spin_lock_init(&ACCESS_PRIVATE(snp, lock));
 		WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
 			     ARRAY_SIZE(snp->srcu_data_have_cbs));
@@ -120,17 +120,17 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
 		snp->srcu_gp_seq_needed_exp = 0;
 		snp->grplo = -1;
 		snp->grphi = -1;
-		if (snp == &sp->node[0]) {
+		if (snp == &ssp->node[0]) {
 			/* Root node, special case. */
 			snp->srcu_parent = NULL;
 			continue;
 		}
 
 		/* Non-root node. */
-		if (snp == sp->level[level + 1])
+		if (snp == ssp->level[level + 1])
 			level++;
-		snp->srcu_parent = sp->level[level - 1] +
-				   (snp - sp->level[level]) /
+		snp->srcu_parent = ssp->level[level - 1] +
+				   (snp - ssp->level[level]) /
 				   levelspread[level - 1];
 	}
 
@@ -141,14 +141,14 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
 	WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) !=
 		     ARRAY_SIZE(sdp->srcu_unlock_count));
 	level = rcu_num_lvls - 1;
-	snp_first = sp->level[level];
+	snp_first = ssp->level[level];
 	for_each_possible_cpu(cpu) {
-		sdp = per_cpu_ptr(sp->sda, cpu);
+		sdp = per_cpu_ptr(ssp->sda, cpu);
 		spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
 		rcu_segcblist_init(&sdp->srcu_cblist);
 		sdp->srcu_cblist_invoking = false;
-		sdp->srcu_gp_seq_needed = sp->srcu_gp_seq;
-		sdp->srcu_gp_seq_needed_exp = sp->srcu_gp_seq;
+		sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq;
+		sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq;
 		sdp->mynode = &snp_first[cpu / levelspread[level]];
 		for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) {
 			if (snp->grplo < 0)
@@ -157,7 +157,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
 		}
 		sdp->cpu = cpu;
 		INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks);
-		sdp->sp = sp;
+		sdp->ssp = ssp;
 		sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
 		if (is_static)
 			continue;
@@ -176,35 +176,35 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
  * parameter is passed through to init_srcu_struct_nodes(), and
  * also tells us that ->sda has already been wired up to srcu_data.
  */
-static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static)
+static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
 {
-	mutex_init(&sp->srcu_cb_mutex);
-	mutex_init(&sp->srcu_gp_mutex);
-	sp->srcu_idx = 0;
-	sp->srcu_gp_seq = 0;
-	sp->srcu_barrier_seq = 0;
-	mutex_init(&sp->srcu_barrier_mutex);
-	atomic_set(&sp->srcu_barrier_cpu_cnt, 0);
-	INIT_DELAYED_WORK(&sp->work, process_srcu);
+	mutex_init(&ssp->srcu_cb_mutex);
+	mutex_init(&ssp->srcu_gp_mutex);
+	ssp->srcu_idx = 0;
+	ssp->srcu_gp_seq = 0;
+	ssp->srcu_barrier_seq = 0;
+	mutex_init(&ssp->srcu_barrier_mutex);
+	atomic_set(&ssp->srcu_barrier_cpu_cnt, 0);
+	INIT_DELAYED_WORK(&ssp->work, process_srcu);
 	if (!is_static)
-		sp->sda = alloc_percpu(struct srcu_data);
-	init_srcu_struct_nodes(sp, is_static);
-	sp->srcu_gp_seq_needed_exp = 0;
-	sp->srcu_last_gp_end = ktime_get_mono_fast_ns();
-	smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */
-	return sp->sda ? 0 : -ENOMEM;
+		ssp->sda = alloc_percpu(struct srcu_data);
+	init_srcu_struct_nodes(ssp, is_static);
+	ssp->srcu_gp_seq_needed_exp = 0;
+	ssp->srcu_last_gp_end = ktime_get_mono_fast_ns();
+	smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */
+	return ssp->sda ? 0 : -ENOMEM;
 }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
-int __init_srcu_struct(struct srcu_struct *sp, const char *name,
+int __init_srcu_struct(struct srcu_struct *ssp, const char *name,
 		       struct lock_class_key *key)
 {
 	/* Don't re-initialize a lock while it is held. */
-	debug_check_no_locks_freed((void *)sp, sizeof(*sp));
-	lockdep_init_map(&sp->dep_map, name, key, 0);
-	spin_lock_init(&ACCESS_PRIVATE(sp, lock));
-	return init_srcu_struct_fields(sp, false);
+	debug_check_no_locks_freed((void *)ssp, sizeof(*ssp));
+	lockdep_init_map(&ssp->dep_map, name, key, 0);
+	spin_lock_init(&ACCESS_PRIVATE(ssp, lock));
+	return init_srcu_struct_fields(ssp, false);
 }
 EXPORT_SYMBOL_GPL(__init_srcu_struct);
 
@@ -212,16 +212,16 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct);
 
 /**
  * init_srcu_struct - initialize a sleep-RCU structure
- * @sp: structure to initialize.
+ * @ssp: structure to initialize.
  *
  * Must invoke this on a given srcu_struct before passing that srcu_struct
  * to any other function.  Each srcu_struct represents a separate domain
  * of SRCU protection.
  */
-int init_srcu_struct(struct srcu_struct *sp)
+int init_srcu_struct(struct srcu_struct *ssp)
 {
-	spin_lock_init(&ACCESS_PRIVATE(sp, lock));
-	return init_srcu_struct_fields(sp, false);
+	spin_lock_init(&ACCESS_PRIVATE(ssp, lock));
+	return init_srcu_struct_fields(ssp, false);
 }
 EXPORT_SYMBOL_GPL(init_srcu_struct);
 
@@ -231,37 +231,37 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
  * First-use initialization of statically allocated srcu_struct
  * structure.  Wiring up the combining tree is more than can be
  * done with compile-time initialization, so this check is added
- * to each update-side SRCU primitive.  Use sp->lock, which -is-
+ * to each update-side SRCU primitive.  Use ssp->lock, which -is-
  * compile-time initialized, to resolve races involving multiple
  * CPUs trying to garner first-use privileges.
  */
-static void check_init_srcu_struct(struct srcu_struct *sp)
+static void check_init_srcu_struct(struct srcu_struct *ssp)
 {
 	unsigned long flags;
 
 	/* The smp_load_acquire() pairs with the smp_store_release(). */
-	if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/
+	if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed))) /*^^^*/
 		return; /* Already initialized. */
-	spin_lock_irqsave_rcu_node(sp, flags);
-	if (!rcu_seq_state(sp->srcu_gp_seq_needed)) {
-		spin_unlock_irqrestore_rcu_node(sp, flags);
+	spin_lock_irqsave_rcu_node(ssp, flags);
+	if (!rcu_seq_state(ssp->srcu_gp_seq_needed)) {
+		spin_unlock_irqrestore_rcu_node(ssp, flags);
 		return;
 	}
-	init_srcu_struct_fields(sp, true);
-	spin_unlock_irqrestore_rcu_node(sp, flags);
+	init_srcu_struct_fields(ssp, true);
+	spin_unlock_irqrestore_rcu_node(ssp, flags);
 }
 
 /*
  * Returns approximate total of the readers' ->srcu_lock_count[] values
  * for the rank of per-CPU counters specified by idx.
  */
-static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx)
+static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx)
 {
 	int cpu;
 	unsigned long sum = 0;
 
 	for_each_possible_cpu(cpu) {
-		struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu);
+		struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
 
 		sum += READ_ONCE(cpuc->srcu_lock_count[idx]);
 	}
@@ -272,13 +272,13 @@ static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx)
  * Returns approximate total of the readers' ->srcu_unlock_count[] values
  * for the rank of per-CPU counters specified by idx.
  */
-static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx)
+static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
 {
 	int cpu;
 	unsigned long sum = 0;
 
 	for_each_possible_cpu(cpu) {
-		struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu);
+		struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
 
 		sum += READ_ONCE(cpuc->srcu_unlock_count[idx]);
 	}
@@ -289,11 +289,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx)
  * Return true if the number of pre-existing readers is determined to
  * be zero.
  */
-static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
+static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
 {
 	unsigned long unlocks;
 
-	unlocks = srcu_readers_unlock_idx(sp, idx);
+	unlocks = srcu_readers_unlock_idx(ssp, idx);
 
 	/*
 	 * Make sure that a lock is always counted if the corresponding
@@ -329,25 +329,25 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
 	 * of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient,
 	 * especially on 64-bit systems.
 	 */
-	return srcu_readers_lock_idx(sp, idx) == unlocks;
+	return srcu_readers_lock_idx(ssp, idx) == unlocks;
 }
 
 /**
  * srcu_readers_active - returns true if there are readers. and false
  *                       otherwise
- * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
+ * @ssp: which srcu_struct to count active readers (holding srcu_read_lock).
  *
  * Note that this is not an atomic primitive, and can therefore suffer
  * severe errors when invoked on an active srcu_struct.  That said, it
  * can be useful as an error check at cleanup time.
  */
-static bool srcu_readers_active(struct srcu_struct *sp)
+static bool srcu_readers_active(struct srcu_struct *ssp)
 {
 	int cpu;
 	unsigned long sum = 0;
 
 	for_each_possible_cpu(cpu) {
-		struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu);
+		struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
 
 		sum += READ_ONCE(cpuc->srcu_lock_count[0]);
 		sum += READ_ONCE(cpuc->srcu_lock_count[1]);
@@ -363,44 +363,44 @@ static bool srcu_readers_active(struct srcu_struct *sp)
  * Return grace-period delay, zero if there are expedited grace
  * periods pending, SRCU_INTERVAL otherwise.
  */
-static unsigned long srcu_get_delay(struct srcu_struct *sp)
+static unsigned long srcu_get_delay(struct srcu_struct *ssp)
 {
-	if (ULONG_CMP_LT(READ_ONCE(sp->srcu_gp_seq),
-			 READ_ONCE(sp->srcu_gp_seq_needed_exp)))
+	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq),
+			 READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
 		return 0;
 	return SRCU_INTERVAL;
 }
 
 /* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */
-void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
+void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
 {
 	int cpu;
 
-	if (WARN_ON(!srcu_get_delay(sp)))
+	if (WARN_ON(!srcu_get_delay(ssp)))
 		return; /* Just leak it! */
-	if (WARN_ON(srcu_readers_active(sp)))
+	if (WARN_ON(srcu_readers_active(ssp)))
 		return; /* Just leak it! */
 	if (quiesced) {
-		if (WARN_ON(delayed_work_pending(&sp->work)))
+		if (WARN_ON(delayed_work_pending(&ssp->work)))
 			return; /* Just leak it! */
 	} else {
-		flush_delayed_work(&sp->work);
+		flush_delayed_work(&ssp->work);
 	}
 	for_each_possible_cpu(cpu)
 		if (quiesced) {
-			if (WARN_ON(delayed_work_pending(&per_cpu_ptr(sp->sda, cpu)->work)))
+			if (WARN_ON(delayed_work_pending(&per_cpu_ptr(ssp->sda, cpu)->work)))
 				return; /* Just leak it! */
 		} else {
-			flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
+			flush_delayed_work(&per_cpu_ptr(ssp->sda, cpu)->work);
 		}
-	if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
-	    WARN_ON(srcu_readers_active(sp))) {
+	if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
+	    WARN_ON(srcu_readers_active(ssp))) {
 		pr_info("%s: Active srcu_struct %p state: %d\n",
-			__func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
+			__func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)));
 		return; /* Caller forgot to stop doing call_srcu()? */
 	}
-	free_percpu(sp->sda);
-	sp->sda = NULL;
+	free_percpu(ssp->sda);
+	ssp->sda = NULL;
 }
 EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
 
@@ -409,12 +409,12 @@ EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
  * srcu_struct.
  * Returns an index that must be passed to the matching srcu_read_unlock().
  */
-int __srcu_read_lock(struct srcu_struct *sp)
+int __srcu_read_lock(struct srcu_struct *ssp)
 {
 	int idx;
 
-	idx = READ_ONCE(sp->srcu_idx) & 0x1;
-	this_cpu_inc(sp->sda->srcu_lock_count[idx]);
+	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
+	this_cpu_inc(ssp->sda->srcu_lock_count[idx]);
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
 	return idx;
 }
@@ -425,10 +425,10 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
  * element of the srcu_struct.  Note that this may well be a different
  * CPU than that which was incremented by the corresponding srcu_read_lock().
  */
-void __srcu_read_unlock(struct srcu_struct *sp, int idx)
+void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
 {
 	smp_mb(); /* C */  /* Avoid leaking the critical section. */
-	this_cpu_inc(sp->sda->srcu_unlock_count[idx]);
+	this_cpu_inc(ssp->sda->srcu_unlock_count[idx]);
 }
 EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 
@@ -444,20 +444,22 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 /*
  * Start an SRCU grace period.
  */
-static void srcu_gp_start(struct srcu_struct *sp)
+static void srcu_gp_start(struct srcu_struct *ssp)
 {
-	struct srcu_data *sdp = this_cpu_ptr(sp->sda);
+	struct srcu_data *sdp = this_cpu_ptr(ssp->sda);
 	int state;
 
-	lockdep_assert_held(&ACCESS_PRIVATE(sp, lock));
-	WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
+	lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock));
+	WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
+	spin_lock_rcu_node(sdp);  /* Interrupts already disabled. */
 	rcu_segcblist_advance(&sdp->srcu_cblist,
-			      rcu_seq_current(&sp->srcu_gp_seq));
+			      rcu_seq_current(&ssp->srcu_gp_seq));
 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
-				       rcu_seq_snap(&sp->srcu_gp_seq));
+				       rcu_seq_snap(&ssp->srcu_gp_seq));
+	spin_unlock_rcu_node(sdp);  /* Interrupts remain disabled. */
 	smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
-	rcu_seq_start(&sp->srcu_gp_seq);
-	state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
+	rcu_seq_start(&ssp->srcu_gp_seq);
+	state = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq));
 	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
 }
 
@@ -511,7 +513,7 @@ static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
  * just-completed grace period, the one corresponding to idx.  If possible,
  * schedule this invocation on the corresponding CPUs.
  */
-static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp,
+static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp,
 				  unsigned long mask, unsigned long delay)
 {
 	int cpu;
@@ -519,7 +521,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp,
 	for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
 		if (!(mask & (1 << (cpu - snp->grplo))))
 			continue;
-		srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), delay);
+		srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, cpu), delay);
 	}
 }
 
@@ -532,7 +534,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp,
  * are initiating callback invocation.  This allows the ->srcu_have_cbs[]
  * array to have a finite number of elements.
  */
-static void srcu_gp_end(struct srcu_struct *sp)
+static void srcu_gp_end(struct srcu_struct *ssp)
 {
 	unsigned long cbdelay;
 	bool cbs;
@@ -546,28 +548,28 @@ static void srcu_gp_end(struct srcu_struct *sp)
 	struct srcu_node *snp;
 
 	/* Prevent more than one additional grace period. */
-	mutex_lock(&sp->srcu_cb_mutex);
+	mutex_lock(&ssp->srcu_cb_mutex);
 
 	/* End the current grace period. */
-	spin_lock_irq_rcu_node(sp);
-	idx = rcu_seq_state(sp->srcu_gp_seq);
+	spin_lock_irq_rcu_node(ssp);
+	idx = rcu_seq_state(ssp->srcu_gp_seq);
 	WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
-	cbdelay = srcu_get_delay(sp);
-	sp->srcu_last_gp_end = ktime_get_mono_fast_ns();
-	rcu_seq_end(&sp->srcu_gp_seq);
-	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
-	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq))
-		sp->srcu_gp_seq_needed_exp = gpseq;
-	spin_unlock_irq_rcu_node(sp);
-	mutex_unlock(&sp->srcu_gp_mutex);
+	cbdelay = srcu_get_delay(ssp);
+	ssp->srcu_last_gp_end = ktime_get_mono_fast_ns();
+	rcu_seq_end(&ssp->srcu_gp_seq);
+	gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
+	if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
+		ssp->srcu_gp_seq_needed_exp = gpseq;
+	spin_unlock_irq_rcu_node(ssp);
+	mutex_unlock(&ssp->srcu_gp_mutex);
 	/* A new grace period can start at this point.  But only one. */
 
 	/* Initiate callback invocation as needed. */
 	idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
-	srcu_for_each_node_breadth_first(sp, snp) {
+	srcu_for_each_node_breadth_first(ssp, snp) {
 		spin_lock_irq_rcu_node(snp);
 		cbs = false;
-		last_lvl = snp >= sp->level[rcu_num_lvls - 1];
+		last_lvl = snp >= ssp->level[rcu_num_lvls - 1];
 		if (last_lvl)
 			cbs = snp->srcu_have_cbs[idx] == gpseq;
 		snp->srcu_have_cbs[idx] = gpseq;
@@ -578,12 +580,12 @@ static void srcu_gp_end(struct srcu_struct *sp)
 		snp->srcu_data_have_cbs[idx] = 0;
 		spin_unlock_irq_rcu_node(snp);
 		if (cbs)
-			srcu_schedule_cbs_snp(sp, snp, mask, cbdelay);
+			srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay);
 
 		/* Occasionally prevent srcu_data counter wrap. */
 		if (!(gpseq & counter_wrap_check) && last_lvl)
 			for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
-				sdp = per_cpu_ptr(sp->sda, cpu);
+				sdp = per_cpu_ptr(ssp->sda, cpu);
 				spin_lock_irqsave_rcu_node(sdp, flags);
 				if (ULONG_CMP_GE(gpseq,
 						 sdp->srcu_gp_seq_needed + 100))
@@ -596,18 +598,18 @@ static void srcu_gp_end(struct srcu_struct *sp)
 	}
 
 	/* Callback initiation done, allow grace periods after next. */
-	mutex_unlock(&sp->srcu_cb_mutex);
+	mutex_unlock(&ssp->srcu_cb_mutex);
 
 	/* Start a new grace period if needed. */
-	spin_lock_irq_rcu_node(sp);
-	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
+	spin_lock_irq_rcu_node(ssp);
+	gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
 	if (!rcu_seq_state(gpseq) &&
-	    ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) {
-		srcu_gp_start(sp);
-		spin_unlock_irq_rcu_node(sp);
-		srcu_reschedule(sp, 0);
+	    ULONG_CMP_LT(gpseq, ssp->srcu_gp_seq_needed)) {
+		srcu_gp_start(ssp);
+		spin_unlock_irq_rcu_node(ssp);
+		srcu_reschedule(ssp, 0);
 	} else {
-		spin_unlock_irq_rcu_node(sp);
+		spin_unlock_irq_rcu_node(ssp);
 	}
 }
 
@@ -618,13 +620,13 @@ static void srcu_gp_end(struct srcu_struct *sp)
  * but without expediting.  To start a completely new grace period,
  * whether expedited or not, use srcu_funnel_gp_start() instead.
  */
-static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
+static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp,
 				  unsigned long s)
 {
 	unsigned long flags;
 
 	for (; snp != NULL; snp = snp->srcu_parent) {
-		if (rcu_seq_done(&sp->srcu_gp_seq, s) ||
+		if (rcu_seq_done(&ssp->srcu_gp_seq, s) ||
 		    ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
 			return;
 		spin_lock_irqsave_rcu_node(snp, flags);
@@ -635,10 +637,10 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
 		WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
 		spin_unlock_irqrestore_rcu_node(snp, flags);
 	}
-	spin_lock_irqsave_rcu_node(sp, flags);
-	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
-		sp->srcu_gp_seq_needed_exp = s;
-	spin_unlock_irqrestore_rcu_node(sp, flags);
+	spin_lock_irqsave_rcu_node(ssp, flags);
+	if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
+		ssp->srcu_gp_seq_needed_exp = s;
+	spin_unlock_irqrestore_rcu_node(ssp, flags);
 }
 
 /*
@@ -651,7 +653,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
  * Note that this function also does the work of srcu_funnel_exp_start(),
  * in some cases by directly invoking it.
  */
-static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
+static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
 				 unsigned long s, bool do_norm)
 {
 	unsigned long flags;
@@ -661,7 +663,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 
 	/* Each pass through the loop does one level of the srcu_node tree. */
 	for (; snp != NULL; snp = snp->srcu_parent) {
-		if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode)
+		if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != sdp->mynode)
 			return; /* GP already done and CBs recorded. */
 		spin_lock_irqsave_rcu_node(snp, flags);
 		if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
@@ -676,7 +678,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 				return;
 			}
 			if (!do_norm)
-				srcu_funnel_exp_start(sp, snp, s);
+				srcu_funnel_exp_start(ssp, snp, s);
 			return;
 		}
 		snp->srcu_have_cbs[idx] = s;
@@ -688,29 +690,29 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 	}
 
 	/* Top of tree, must ensure the grace period will be started. */
-	spin_lock_irqsave_rcu_node(sp, flags);
-	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) {
+	spin_lock_irqsave_rcu_node(ssp, flags);
+	if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed, s)) {
 		/*
 		 * Record need for grace period s.  Pair with load
 		 * acquire setting up for initialization.
 		 */
-		smp_store_release(&sp->srcu_gp_seq_needed, s); /*^^^*/
+		smp_store_release(&ssp->srcu_gp_seq_needed, s); /*^^^*/
 	}
-	if (!do_norm && ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
-		sp->srcu_gp_seq_needed_exp = s;
+	if (!do_norm && ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
+		ssp->srcu_gp_seq_needed_exp = s;
 
 	/* If grace period not already done and none in progress, start it. */
-	if (!rcu_seq_done(&sp->srcu_gp_seq, s) &&
-	    rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) {
-		WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
-		srcu_gp_start(sp);
+	if (!rcu_seq_done(&ssp->srcu_gp_seq, s) &&
+	    rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) {
+		WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
+		srcu_gp_start(ssp);
 		if (likely(srcu_init_done))
-			queue_delayed_work(rcu_gp_wq, &sp->work,
-					   srcu_get_delay(sp));
-		else if (list_empty(&sp->work.work.entry))
-			list_add(&sp->work.work.entry, &srcu_boot_list);
+			queue_delayed_work(rcu_gp_wq, &ssp->work,
+					   srcu_get_delay(ssp));
+		else if (list_empty(&ssp->work.work.entry))
+			list_add(&ssp->work.work.entry, &srcu_boot_list);
 	}
-	spin_unlock_irqrestore_rcu_node(sp, flags);
+	spin_unlock_irqrestore_rcu_node(ssp, flags);
 }
 
 /*
@@ -718,12 +720,12 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
  * loop an additional time if there is an expedited grace period pending.
  * The caller must ensure that ->srcu_idx is not changed while checking.
  */
-static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
+static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
 {
 	for (;;) {
-		if (srcu_readers_active_idx_check(sp, idx))
+		if (srcu_readers_active_idx_check(ssp, idx))
 			return true;
-		if (--trycount + !srcu_get_delay(sp) <= 0)
+		if (--trycount + !srcu_get_delay(ssp) <= 0)
 			return false;
 		udelay(SRCU_RETRY_CHECK_DELAY);
 	}
@@ -734,7 +736,7 @@ static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
  * use the other rank of the ->srcu_(un)lock_count[] arrays.  This allows
  * us to wait for pre-existing readers in a starvation-free manner.
  */
-static void srcu_flip(struct srcu_struct *sp)
+static void srcu_flip(struct srcu_struct *ssp)
 {
 	/*
 	 * Ensure that if this updater saw a given reader's increment
@@ -746,7 +748,7 @@ static void srcu_flip(struct srcu_struct *sp)
 	 */
 	smp_mb(); /* E */  /* Pairs with B and C. */
 
-	WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1);
+	WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);
 
 	/*
 	 * Ensure that if the updater misses an __srcu_read_unlock()
@@ -779,7 +781,7 @@ static void srcu_flip(struct srcu_struct *sp)
  * negligible when amoritized over that time period, and the extra latency
  * of a needlessly non-expedited grace period is similarly negligible.
  */
-static bool srcu_might_be_idle(struct srcu_struct *sp)
+static bool srcu_might_be_idle(struct srcu_struct *ssp)
 {
 	unsigned long curseq;
 	unsigned long flags;
@@ -788,7 +790,7 @@ static bool srcu_might_be_idle(struct srcu_struct *sp)
 
 	/* If the local srcu_data structure has callbacks, not idle.  */
 	local_irq_save(flags);
-	sdp = this_cpu_ptr(sp->sda);
+	sdp = this_cpu_ptr(ssp->sda);
 	if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) {
 		local_irq_restore(flags);
 		return false; /* Callbacks already present, so not idle. */
@@ -804,17 +806,17 @@ static bool srcu_might_be_idle(struct srcu_struct *sp)
 	/* First, see if enough time has passed since the last GP. */
 	t = ktime_get_mono_fast_ns();
 	if (exp_holdoff == 0 ||
-	    time_in_range_open(t, sp->srcu_last_gp_end,
-			       sp->srcu_last_gp_end + exp_holdoff))
+	    time_in_range_open(t, ssp->srcu_last_gp_end,
+			       ssp->srcu_last_gp_end + exp_holdoff))
 		return false; /* Too soon after last GP. */
 
 	/* Next, check for probable idleness. */
-	curseq = rcu_seq_current(&sp->srcu_gp_seq);
+	curseq = rcu_seq_current(&ssp->srcu_gp_seq);
 	smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */
-	if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed)))
+	if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_gp_seq_needed)))
 		return false; /* Grace period in progress, so not idle. */
 	smp_mb(); /* Order ->srcu_gp_seq with prior access. */
-	if (curseq != rcu_seq_current(&sp->srcu_gp_seq))
+	if (curseq != rcu_seq_current(&ssp->srcu_gp_seq))
 		return false; /* GP # changed, so not idle. */
 	return true; /* With reasonable probability, idle! */
 }
@@ -854,16 +856,17 @@ static void srcu_leak_callback(struct rcu_head *rhp)
  * srcu_read_lock(), and srcu_read_unlock() that are all passed the same
  * srcu_struct structure.
  */
-void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
+void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
 		 rcu_callback_t func, bool do_norm)
 {
 	unsigned long flags;
+	int idx;
 	bool needexp = false;
 	bool needgp = false;
 	unsigned long s;
 	struct srcu_data *sdp;
 
-	check_init_srcu_struct(sp);
+	check_init_srcu_struct(ssp);
 	if (debug_rcu_head_queue(rhp)) {
 		/* Probable double call_srcu(), so leak the callback. */
 		WRITE_ONCE(rhp->func, srcu_leak_callback);
@@ -871,13 +874,14 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 		return;
 	}
 	rhp->func = func;
+	idx = srcu_read_lock(ssp);
 	local_irq_save(flags);
-	sdp = this_cpu_ptr(sp->sda);
+	sdp = this_cpu_ptr(ssp->sda);
 	spin_lock_rcu_node(sdp);
 	rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false);
 	rcu_segcblist_advance(&sdp->srcu_cblist,
-			      rcu_seq_current(&sp->srcu_gp_seq));
-	s = rcu_seq_snap(&sp->srcu_gp_seq);
+			      rcu_seq_current(&ssp->srcu_gp_seq));
+	s = rcu_seq_snap(&ssp->srcu_gp_seq);
 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s);
 	if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
 		sdp->srcu_gp_seq_needed = s;
@@ -889,14 +893,15 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 	}
 	spin_unlock_irqrestore_rcu_node(sdp, flags);
 	if (needgp)
-		srcu_funnel_gp_start(sp, sdp, s, do_norm);
+		srcu_funnel_gp_start(ssp, sdp, s, do_norm);
 	else if (needexp)
-		srcu_funnel_exp_start(sp, sdp->mynode, s);
+		srcu_funnel_exp_start(ssp, sdp->mynode, s);
+	srcu_read_unlock(ssp, idx);
 }
 
 /**
  * call_srcu() - Queue a callback for invocation after an SRCU grace period
- * @sp: srcu_struct in queue the callback
+ * @ssp: srcu_struct in queue the callback
  * @rhp: structure to be used for queueing the SRCU callback.
  * @func: function to be invoked after the SRCU grace period
  *
@@ -911,21 +916,21 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
  * The callback will be invoked from process context, but must nevertheless
  * be fast and must not block.
  */
-void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
+void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
 	       rcu_callback_t func)
 {
-	__call_srcu(sp, rhp, func, true);
+	__call_srcu(ssp, rhp, func, true);
 }
 EXPORT_SYMBOL_GPL(call_srcu);
 
 /*
  * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
  */
-static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm)
+static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm)
 {
 	struct rcu_synchronize rcu;
 
-	RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) ||
+	RCU_LOCKDEP_WARN(lock_is_held(&ssp->dep_map) ||
 			 lock_is_held(&rcu_bh_lock_map) ||
 			 lock_is_held(&rcu_lock_map) ||
 			 lock_is_held(&rcu_sched_lock_map),
@@ -934,10 +939,10 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm)
 	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
 		return;
 	might_sleep();
-	check_init_srcu_struct(sp);
+	check_init_srcu_struct(ssp);
 	init_completion(&rcu.completion);
 	init_rcu_head_on_stack(&rcu.head);
-	__call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm);
+	__call_srcu(ssp, &rcu.head, wakeme_after_rcu, do_norm);
 	wait_for_completion(&rcu.completion);
 	destroy_rcu_head_on_stack(&rcu.head);
 
@@ -953,7 +958,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm)
 
 /**
  * synchronize_srcu_expedited - Brute-force SRCU grace period
- * @sp: srcu_struct with which to synchronize.
+ * @ssp: srcu_struct with which to synchronize.
  *
  * Wait for an SRCU grace period to elapse, but be more aggressive about
  * spinning rather than blocking when waiting.
@@ -961,15 +966,15 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm)
  * Note that synchronize_srcu_expedited() has the same deadlock and
  * memory-ordering properties as does synchronize_srcu().
  */
-void synchronize_srcu_expedited(struct srcu_struct *sp)
+void synchronize_srcu_expedited(struct srcu_struct *ssp)
 {
-	__synchronize_srcu(sp, rcu_gp_is_normal());
+	__synchronize_srcu(ssp, rcu_gp_is_normal());
 }
 EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
 
 /**
  * synchronize_srcu - wait for prior SRCU read-side critical-section completion
- * @sp: srcu_struct with which to synchronize.
+ * @ssp: srcu_struct with which to synchronize.
  *
  * Wait for the count to drain to zero of both indexes. To avoid the
  * possible starvation of synchronize_srcu(), it waits for the count of
@@ -1011,12 +1016,12 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
  * SRCU must also provide it.  Note that detecting idleness is heuristic
  * and subject to both false positives and negatives.
  */
-void synchronize_srcu(struct srcu_struct *sp)
+void synchronize_srcu(struct srcu_struct *ssp)
 {
-	if (srcu_might_be_idle(sp) || rcu_gp_is_expedited())
-		synchronize_srcu_expedited(sp);
+	if (srcu_might_be_idle(ssp) || rcu_gp_is_expedited())
+		synchronize_srcu_expedited(ssp);
 	else
-		__synchronize_srcu(sp, true);
+		__synchronize_srcu(ssp, true);
 }
 EXPORT_SYMBOL_GPL(synchronize_srcu);
 
@@ -1026,36 +1031,36 @@ EXPORT_SYMBOL_GPL(synchronize_srcu);
 static void srcu_barrier_cb(struct rcu_head *rhp)
 {
 	struct srcu_data *sdp;
-	struct srcu_struct *sp;
+	struct srcu_struct *ssp;
 
 	sdp = container_of(rhp, struct srcu_data, srcu_barrier_head);
-	sp = sdp->sp;
-	if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt))
-		complete(&sp->srcu_barrier_completion);
+	ssp = sdp->ssp;
+	if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))
+		complete(&ssp->srcu_barrier_completion);
 }
 
 /**
  * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
- * @sp: srcu_struct on which to wait for in-flight callbacks.
+ * @ssp: srcu_struct on which to wait for in-flight callbacks.
  */
-void srcu_barrier(struct srcu_struct *sp)
+void srcu_barrier(struct srcu_struct *ssp)
 {
 	int cpu;
 	struct srcu_data *sdp;
-	unsigned long s = rcu_seq_snap(&sp->srcu_barrier_seq);
+	unsigned long s = rcu_seq_snap(&ssp->srcu_barrier_seq);
 
-	check_init_srcu_struct(sp);
-	mutex_lock(&sp->srcu_barrier_mutex);
-	if (rcu_seq_done(&sp->srcu_barrier_seq, s)) {
+	check_init_srcu_struct(ssp);
+	mutex_lock(&ssp->srcu_barrier_mutex);
+	if (rcu_seq_done(&ssp->srcu_barrier_seq, s)) {
 		smp_mb(); /* Force ordering following return. */
-		mutex_unlock(&sp->srcu_barrier_mutex);
+		mutex_unlock(&ssp->srcu_barrier_mutex);
 		return; /* Someone else did our work for us. */
 	}
-	rcu_seq_start(&sp->srcu_barrier_seq);
-	init_completion(&sp->srcu_barrier_completion);
+	rcu_seq_start(&ssp->srcu_barrier_seq);
+	init_completion(&ssp->srcu_barrier_completion);
 
 	/* Initial count prevents reaching zero until all CBs are posted. */
-	atomic_set(&sp->srcu_barrier_cpu_cnt, 1);
+	atomic_set(&ssp->srcu_barrier_cpu_cnt, 1);
 
 	/*
 	 * Each pass through this loop enqueues a callback, but only
@@ -1066,39 +1071,39 @@ void srcu_barrier(struct srcu_struct *sp)
 	 * grace period as the last callback already in the queue.
 	 */
 	for_each_possible_cpu(cpu) {
-		sdp = per_cpu_ptr(sp->sda, cpu);
+		sdp = per_cpu_ptr(ssp->sda, cpu);
 		spin_lock_irq_rcu_node(sdp);
-		atomic_inc(&sp->srcu_barrier_cpu_cnt);
+		atomic_inc(&ssp->srcu_barrier_cpu_cnt);
 		sdp->srcu_barrier_head.func = srcu_barrier_cb;
 		debug_rcu_head_queue(&sdp->srcu_barrier_head);
 		if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
 					   &sdp->srcu_barrier_head, 0)) {
 			debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
-			atomic_dec(&sp->srcu_barrier_cpu_cnt);
+			atomic_dec(&ssp->srcu_barrier_cpu_cnt);
 		}
 		spin_unlock_irq_rcu_node(sdp);
 	}
 
 	/* Remove the initial count, at which point reaching zero can happen. */
-	if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt))
-		complete(&sp->srcu_barrier_completion);
-	wait_for_completion(&sp->srcu_barrier_completion);
+	if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))
+		complete(&ssp->srcu_barrier_completion);
+	wait_for_completion(&ssp->srcu_barrier_completion);
 
-	rcu_seq_end(&sp->srcu_barrier_seq);
-	mutex_unlock(&sp->srcu_barrier_mutex);
+	rcu_seq_end(&ssp->srcu_barrier_seq);
+	mutex_unlock(&ssp->srcu_barrier_mutex);
 }
 EXPORT_SYMBOL_GPL(srcu_barrier);
 
 /**
  * srcu_batches_completed - return batches completed.
- * @sp: srcu_struct on which to report batch completion.
+ * @ssp: srcu_struct on which to report batch completion.
  *
  * Report the number of batches, correlated with, but not necessarily
  * precisely the same as, the number of grace periods that have elapsed.
  */
-unsigned long srcu_batches_completed(struct srcu_struct *sp)
+unsigned long srcu_batches_completed(struct srcu_struct *ssp)
 {
-	return sp->srcu_idx;
+	return ssp->srcu_idx;
 }
 EXPORT_SYMBOL_GPL(srcu_batches_completed);
 
@@ -1107,11 +1112,11 @@ EXPORT_SYMBOL_GPL(srcu_batches_completed);
  * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has
  * completed in that state.
  */
-static void srcu_advance_state(struct srcu_struct *sp)
+static void srcu_advance_state(struct srcu_struct *ssp)
 {
 	int idx;
 
-	mutex_lock(&sp->srcu_gp_mutex);
+	mutex_lock(&ssp->srcu_gp_mutex);
 
 	/*
 	 * Because readers might be delayed for an extended period after
@@ -1123,47 +1128,47 @@ static void srcu_advance_state(struct srcu_struct *sp)
 	 * The load-acquire ensures that we see the accesses performed
 	 * by the prior grace period.
 	 */
-	idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */
+	idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq)); /* ^^^ */
 	if (idx == SRCU_STATE_IDLE) {
-		spin_lock_irq_rcu_node(sp);
-		if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
-			WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq));
-			spin_unlock_irq_rcu_node(sp);
-			mutex_unlock(&sp->srcu_gp_mutex);
+		spin_lock_irq_rcu_node(ssp);
+		if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) {
+			WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq));
+			spin_unlock_irq_rcu_node(ssp);
+			mutex_unlock(&ssp->srcu_gp_mutex);
 			return;
 		}
-		idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
+		idx = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq));
 		if (idx == SRCU_STATE_IDLE)
-			srcu_gp_start(sp);
-		spin_unlock_irq_rcu_node(sp);
+			srcu_gp_start(ssp);
+		spin_unlock_irq_rcu_node(ssp);
 		if (idx != SRCU_STATE_IDLE) {
-			mutex_unlock(&sp->srcu_gp_mutex);
+			mutex_unlock(&ssp->srcu_gp_mutex);
 			return; /* Someone else started the grace period. */
 		}
 	}
 
-	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
-		idx = 1 ^ (sp->srcu_idx & 1);
-		if (!try_check_zero(sp, idx, 1)) {
-			mutex_unlock(&sp->srcu_gp_mutex);
+	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
+		idx = 1 ^ (ssp->srcu_idx & 1);
+		if (!try_check_zero(ssp, idx, 1)) {
+			mutex_unlock(&ssp->srcu_gp_mutex);
 			return; /* readers present, retry later. */
 		}
-		srcu_flip(sp);
-		rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2);
+		srcu_flip(ssp);
+		rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2);
 	}
 
-	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
+	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
 
 		/*
 		 * SRCU read-side critical sections are normally short,
 		 * so check at least twice in quick succession after a flip.
 		 */
-		idx = 1 ^ (sp->srcu_idx & 1);
-		if (!try_check_zero(sp, idx, 2)) {
-			mutex_unlock(&sp->srcu_gp_mutex);
+		idx = 1 ^ (ssp->srcu_idx & 1);
+		if (!try_check_zero(ssp, idx, 2)) {
+			mutex_unlock(&ssp->srcu_gp_mutex);
 			return; /* readers present, retry later. */
 		}
-		srcu_gp_end(sp);  /* Releases ->srcu_gp_mutex. */
+		srcu_gp_end(ssp);  /* Releases ->srcu_gp_mutex. */
 	}
 }
 
@@ -1179,14 +1184,14 @@ static void srcu_invoke_callbacks(struct work_struct *work)
 	struct rcu_cblist ready_cbs;
 	struct rcu_head *rhp;
 	struct srcu_data *sdp;
-	struct srcu_struct *sp;
+	struct srcu_struct *ssp;
 
 	sdp = container_of(work, struct srcu_data, work.work);
-	sp = sdp->sp;
+	ssp = sdp->ssp;
 	rcu_cblist_init(&ready_cbs);
 	spin_lock_irq_rcu_node(sdp);
 	rcu_segcblist_advance(&sdp->srcu_cblist,
-			      rcu_seq_current(&sp->srcu_gp_seq));
+			      rcu_seq_current(&ssp->srcu_gp_seq));
 	if (sdp->srcu_cblist_invoking ||
 	    !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
 		spin_unlock_irq_rcu_node(sdp);
@@ -1212,7 +1217,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
 	spin_lock_irq_rcu_node(sdp);
 	rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs);
 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
-				       rcu_seq_snap(&sp->srcu_gp_seq));
+				       rcu_seq_snap(&ssp->srcu_gp_seq));
 	sdp->srcu_cblist_invoking = false;
 	more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
 	spin_unlock_irq_rcu_node(sdp);
@@ -1224,24 +1229,24 @@ static void srcu_invoke_callbacks(struct work_struct *work)
  * Finished one round of SRCU grace period.  Start another if there are
  * more SRCU callbacks queued, otherwise put SRCU into not-running state.
  */
-static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
+static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
 {
 	bool pushgp = true;
 
-	spin_lock_irq_rcu_node(sp);
-	if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
-		if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) {
+	spin_lock_irq_rcu_node(ssp);
+	if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) {
+		if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq))) {
 			/* All requests fulfilled, time to go idle. */
 			pushgp = false;
 		}
-	} else if (!rcu_seq_state(sp->srcu_gp_seq)) {
+	} else if (!rcu_seq_state(ssp->srcu_gp_seq)) {
 		/* Outstanding request and no GP.  Start one. */
-		srcu_gp_start(sp);
+		srcu_gp_start(ssp);
 	}
-	spin_unlock_irq_rcu_node(sp);
+	spin_unlock_irq_rcu_node(ssp);
 
 	if (pushgp)
-		queue_delayed_work(rcu_gp_wq, &sp->work, delay);
+		queue_delayed_work(rcu_gp_wq, &ssp->work, delay);
 }
 
 /*
@@ -1249,41 +1254,41 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
  */
 static void process_srcu(struct work_struct *work)
 {
-	struct srcu_struct *sp;
+	struct srcu_struct *ssp;
 
-	sp = container_of(work, struct srcu_struct, work.work);
+	ssp = container_of(work, struct srcu_struct, work.work);
 
-	srcu_advance_state(sp);
-	srcu_reschedule(sp, srcu_get_delay(sp));
+	srcu_advance_state(ssp);
+	srcu_reschedule(ssp, srcu_get_delay(ssp));
 }
 
 void srcutorture_get_gp_data(enum rcutorture_type test_type,
-			     struct srcu_struct *sp, int *flags,
+			     struct srcu_struct *ssp, int *flags,
 			     unsigned long *gp_seq)
 {
 	if (test_type != SRCU_FLAVOR)
 		return;
 	*flags = 0;
-	*gp_seq = rcu_seq_current(&sp->srcu_gp_seq);
+	*gp_seq = rcu_seq_current(&ssp->srcu_gp_seq);
 }
 EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
 
-void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf)
+void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
 {
 	int cpu;
 	int idx;
 	unsigned long s0 = 0, s1 = 0;
 
-	idx = sp->srcu_idx & 0x1;
+	idx = ssp->srcu_idx & 0x1;
 	pr_alert("%s%s Tree SRCU g%ld per-CPU(idx=%d):",
-		 tt, tf, rcu_seq_current(&sp->srcu_gp_seq), idx);
+		 tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), idx);
 	for_each_possible_cpu(cpu) {
 		unsigned long l0, l1;
 		unsigned long u0, u1;
 		long c0, c1;
 		struct srcu_data *sdp;
 
-		sdp = per_cpu_ptr(sp->sda, cpu);
+		sdp = per_cpu_ptr(ssp->sda, cpu);
 		u0 = sdp->srcu_unlock_count[!idx];
 		u1 = sdp->srcu_unlock_count[idx];
 
@@ -1318,14 +1323,14 @@ early_initcall(srcu_bootup_announce);
 
 void __init srcu_init(void)
 {
-	struct srcu_struct *sp;
+	struct srcu_struct *ssp;
 
 	srcu_init_done = true;
 	while (!list_empty(&srcu_boot_list)) {
-		sp = list_first_entry(&srcu_boot_list, struct srcu_struct,
+		ssp = list_first_entry(&srcu_boot_list, struct srcu_struct,
 				      work.work.entry);
-		check_init_srcu_struct(sp);
-		list_del_init(&sp->work.work.entry);
-		queue_work(rcu_gp_wq, &sp->work.work);
+		check_init_srcu_struct(ssp);
+		list_del_init(&ssp->work.work.entry);
+		queue_work(rcu_gp_wq, &ssp->work.work);
 	}
 }
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 3f943efcf61c..be10036fa621 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -44,15 +44,15 @@ static const struct {
 		__INIT_HELD(rcu_read_lock_held)
 	},
 	[RCU_SCHED_SYNC] = {
-		.sync = synchronize_sched,
-		.call = call_rcu_sched,
-		.wait = rcu_barrier_sched,
+		.sync = synchronize_rcu,
+		.call = call_rcu,
+		.wait = rcu_barrier,
 		__INIT_HELD(rcu_read_lock_sched_held)
 	},
 	[RCU_BH_SYNC] = {
-		.sync = synchronize_rcu_bh,
-		.call = call_rcu_bh,
-		.wait = rcu_barrier_bh,
+		.sync = synchronize_rcu,
+		.call = call_rcu,
+		.wait = rcu_barrier,
 		__INIT_HELD(rcu_read_lock_bh_held)
 	},
 };
@@ -125,8 +125,7 @@ void rcu_sync_enter(struct rcu_sync *rsp)
 		rsp->gp_state = GP_PENDING;
 	spin_unlock_irq(&rsp->rss_lock);
 
-	BUG_ON(need_wait && need_sync);
-
+	WARN_ON_ONCE(need_wait && need_sync);
 	if (need_sync) {
 		gp_ops[rsp->gp_type].sync();
 		rsp->gp_state = GP_PASSED;
@@ -139,7 +138,7 @@ void rcu_sync_enter(struct rcu_sync *rsp)
 		 * Nobody has yet been allowed the 'fast' path and thus we can
 		 * avoid doing any sync(). The callback will get 'dropped'.
 		 */
-		BUG_ON(rsp->gp_state != GP_PASSED);
+		WARN_ON_ONCE(rsp->gp_state != GP_PASSED);
 	}
 }
 
@@ -166,8 +165,8 @@ static void rcu_sync_func(struct rcu_head *rhp)
 	struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head);
 	unsigned long flags;
 
-	BUG_ON(rsp->gp_state != GP_PASSED);
-	BUG_ON(rsp->cb_state == CB_IDLE);
+	WARN_ON_ONCE(rsp->gp_state != GP_PASSED);
+	WARN_ON_ONCE(rsp->cb_state == CB_IDLE);
 
 	spin_lock_irqsave(&rsp->rss_lock, flags);
 	if (rsp->gp_count) {
@@ -225,7 +224,7 @@ void rcu_sync_dtor(struct rcu_sync *rsp)
 {
 	int cb_state;
 
-	BUG_ON(rsp->gp_count);
+	WARN_ON_ONCE(rsp->gp_count);
 
 	spin_lock_irq(&rsp->rss_lock);
 	if (rsp->cb_state == CB_REPLAY)
@@ -235,6 +234,6 @@ void rcu_sync_dtor(struct rcu_sync *rsp)
 
 	if (cb_state != CB_IDLE) {
 		gp_ops[rsp->gp_type].wait();
-		BUG_ON(rsp->cb_state != CB_IDLE);
+		WARN_ON_ONCE(rsp->cb_state != CB_IDLE);
 	}
 }
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 121f833acd04..6ec3abbe90e2 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -500,16 +500,29 @@ void rcu_force_quiescent_state(void)
 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 
 /*
+ * Convert a ->gp_state value to a character string.
+ */
+static const char *gp_state_getname(short gs)
+{
+	if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names))
+		return "???";
+	return gp_state_names[gs];
+}
+
+/*
  * Show the state of the grace-period kthreads.
  */
 void show_rcu_gp_kthreads(void)
 {
 	int cpu;
+	unsigned long j;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 
-	pr_info("%s: wait state: %d ->state: %#lx\n", rcu_state.name,
-		rcu_state.gp_state, rcu_state.gp_kthread->state);
+	j = jiffies - READ_ONCE(rcu_state.gp_activity);
+	pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %ld\n",
+		rcu_state.name, gp_state_getname(rcu_state.gp_state),
+		rcu_state.gp_state, rcu_state.gp_kthread->state, j);
 	rcu_for_each_node_breadth_first(rnp) {
 		if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed))
 			continue;
@@ -891,12 +904,12 @@ void rcu_irq_enter_irqson(void)
 }
 
 /**
- * rcu_is_watching - see if RCU thinks that the current CPU is idle
+ * rcu_is_watching - see if RCU thinks that the current CPU is not idle
  *
  * Return true if RCU is watching the running CPU, which means that this
  * CPU can safely enter RCU read-side critical sections.  In other words,
- * if the current CPU is in its idle loop and is neither in an interrupt
- * or NMI handler, return true.
+ * if the current CPU is not in its idle loop or is in an interrupt or
+ * NMI handler, return true.
  */
 bool notrace rcu_is_watching(void)
 {
@@ -1143,16 +1156,6 @@ static void record_gp_stall_check_time(void)
 }
 
 /*
- * Convert a ->gp_state value to a character string.
- */
-static const char *gp_state_getname(short gs)
-{
-	if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names))
-		return "???";
-	return gp_state_names[gs];
-}
-
-/*
  * Complain about starvation of grace-period kthread.
  */
 static void rcu_check_gp_kthread_starvation(void)
@@ -2032,9 +2035,9 @@ static void rcu_gp_cleanup(void)
 	rnp = rcu_get_root();
 	raw_spin_lock_irq_rcu_node(rnp); /* GP before ->gp_seq update. */
 
-	/* Declare grace period done. */
-	rcu_seq_end(&rcu_state.gp_seq);
+	/* Declare grace period done, trace first to use old GP number. */
 	trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end"));
+	rcu_seq_end(&rcu_state.gp_seq);
 	rcu_state.gp_state = RCU_GP_IDLE;
 	/* Check for GP requests since above loop. */
 	rdp = this_cpu_ptr(&rcu_data);
@@ -2600,10 +2603,10 @@ static void force_quiescent_state(void)
  * This function checks for grace-period requests that fail to motivate
  * RCU to come out of its idle mode.
  */
-static void
-rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp)
+void
+rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
+			 const unsigned long gpssdelay)
 {
-	const unsigned long gpssdelay = rcu_jiffies_till_stall_check() * HZ;
 	unsigned long flags;
 	unsigned long j;
 	struct rcu_node *rnp_root = rcu_get_root();
@@ -2690,7 +2693,7 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused
 		local_irq_restore(flags);
 	}
 
-	rcu_check_gp_start_stall(rnp, rdp);
+	rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check());
 
 	/* If there are callbacks ready, invoke them. */
 	if (rcu_segcblist_ready_cbs(&rdp->cblist))
@@ -2826,7 +2829,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, int cpu, bool lazy)
 		 * Very early boot, before rcu_init().  Initialize if needed
 		 * and then drop through to queue the callback.
 		 */
-		BUG_ON(cpu != -1);
+		WARN_ON_ONCE(cpu != -1);
 		WARN_ON_ONCE(!rcu_is_watching());
 		if (rcu_segcblist_empty(&rdp->cblist))
 			rcu_segcblist_init(&rdp->cblist);
@@ -3485,7 +3488,8 @@ static int __init rcu_spawn_gp_kthread(void)
 
 	rcu_scheduler_fully_active = 1;
 	t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name);
-	BUG_ON(IS_ERR(t));
+	if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n", __func__))
+		return 0;
 	rnp = rcu_get_root();
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	rcu_state.gp_kthread = t;
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 703e19ff532d..c3e2807a834a 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -57,7 +57,7 @@ struct rcu_node {
 					/*  some rcu_state fields as well as */
 					/*  following. */
 	unsigned long gp_seq;	/* Track rsp->rcu_gp_seq. */
-	unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed. */
+	unsigned long gp_seq_needed; /* Track furthest future GP request. */
 	unsigned long completedqs; /* All QSes done for this node. */
 	unsigned long qsmask;	/* CPUs or groups that need to switch in */
 				/*  order for current grace period to proceed.*/
@@ -163,7 +163,7 @@ union rcu_noqs {
 struct rcu_data {
 	/* 1) quiescent-state and grace-period handling : */
 	unsigned long	gp_seq;		/* Track rsp->rcu_gp_seq counter. */
-	unsigned long	gp_seq_needed;	/* Track rsp->rcu_gp_seq_needed ctr. */
+	unsigned long	gp_seq_needed;	/* Track furthest future GP request. */
 	union rcu_noqs	cpu_no_qs;	/* No QSes yet for this CPU. */
 	bool		core_needs_qs;	/* Core waits for quiesc state. */
 	bool		beenonline;	/* CPU online at least once. */
@@ -398,17 +398,6 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name;
 #define RCU_NAME rcu_name
 #endif /* #else #ifdef CONFIG_TRACING */
 
-/*
- * RCU implementation internal declarations:
- */
-extern struct rcu_state rcu_sched_state;
-
-extern struct rcu_state rcu_bh_state;
-
-#ifdef CONFIG_PREEMPT_RCU
-extern struct rcu_state rcu_preempt_state;
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-
 int rcu_dynticks_snap(struct rcu_data *rdp);
 
 #ifdef CONFIG_RCU_BOOST
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 8d18c1014e2b..928fe5893a57 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -450,10 +450,12 @@ static void sync_rcu_exp_select_cpus(smp_call_func_t func)
 		}
 		INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
 		preempt_disable();
-		cpu = cpumask_next(rnp->grplo - 1, cpu_online_mask);
+		cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1);
 		/* If all offline, queue the work on an unbound CPU. */
-		if (unlikely(cpu > rnp->grphi))
+		if (unlikely(cpu > rnp->grphi - rnp->grplo))
 			cpu = WORK_CPU_UNBOUND;
+		else
+			cpu += rnp->grplo;
 		queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
 		preempt_enable();
 		rnp->exp_need_flush = true;
@@ -690,8 +692,10 @@ static void sync_rcu_exp_handler(void *unused)
 	 */
 	if (t->rcu_read_lock_nesting > 0) {
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
-		if (rnp->expmask & rdp->grpmask)
+		if (rnp->expmask & rdp->grpmask) {
 			rdp->deferred_qs = true;
+			WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, true);
+		}
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	}
 
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 05915e536336..605ff3b06098 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -397,6 +397,11 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
 	return rnp->gp_tasks != NULL;
 }
 
+/* Bias and limit values for ->rcu_read_lock_nesting. */
+#define RCU_NEST_BIAS INT_MAX
+#define RCU_NEST_NMAX (-INT_MAX / 2)
+#define RCU_NEST_PMAX (INT_MAX / 2)
+
 /*
  * Preemptible RCU implementation for rcu_read_lock().
  * Just increment ->rcu_read_lock_nesting, shared state will be updated
@@ -405,6 +410,8 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
 void __rcu_read_lock(void)
 {
 	current->rcu_read_lock_nesting++;
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING))
+		WARN_ON_ONCE(current->rcu_read_lock_nesting > RCU_NEST_PMAX);
 	barrier();  /* critical section after entry code. */
 }
 EXPORT_SYMBOL_GPL(__rcu_read_lock);
@@ -424,20 +431,18 @@ void __rcu_read_unlock(void)
 		--t->rcu_read_lock_nesting;
 	} else {
 		barrier();  /* critical section before exit code. */
-		t->rcu_read_lock_nesting = INT_MIN;
+		t->rcu_read_lock_nesting = -RCU_NEST_BIAS;
 		barrier();  /* assign before ->rcu_read_unlock_special load */
 		if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
 			rcu_read_unlock_special(t);
 		barrier();  /* ->rcu_read_unlock_special load before assign */
 		t->rcu_read_lock_nesting = 0;
 	}
-#ifdef CONFIG_PROVE_LOCKING
-	{
-		int rrln = READ_ONCE(t->rcu_read_lock_nesting);
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+		int rrln = t->rcu_read_lock_nesting;
 
-		WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
+		WARN_ON_ONCE(rrln < 0 && rrln > RCU_NEST_NMAX);
 	}
-#endif /* #ifdef CONFIG_PROVE_LOCKING */
 }
 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
 
@@ -597,7 +602,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
  */
 static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
 {
-	return (this_cpu_ptr(&rcu_data)->deferred_qs ||
+	return (__this_cpu_read(rcu_data.deferred_qs) ||
 		READ_ONCE(t->rcu_read_unlock_special.s)) &&
 	       t->rcu_read_lock_nesting <= 0;
 }
@@ -617,11 +622,11 @@ static void rcu_preempt_deferred_qs(struct task_struct *t)
 	if (!rcu_preempt_need_deferred_qs(t))
 		return;
 	if (couldrecurse)
-		t->rcu_read_lock_nesting -= INT_MIN;
+		t->rcu_read_lock_nesting -= RCU_NEST_BIAS;
 	local_irq_save(flags);
 	rcu_preempt_deferred_qs_irqrestore(t, flags);
 	if (couldrecurse)
-		t->rcu_read_lock_nesting += INT_MIN;
+		t->rcu_read_lock_nesting += RCU_NEST_BIAS;
 }
 
 /*
@@ -642,13 +647,21 @@ static void rcu_read_unlock_special(struct task_struct *t)
 
 	local_irq_save(flags);
 	irqs_were_disabled = irqs_disabled_flags(flags);
-	if ((preempt_bh_were_disabled || irqs_were_disabled) &&
-	    t->rcu_read_unlock_special.b.blocked) {
+	if (preempt_bh_were_disabled || irqs_were_disabled) {
+		WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false);
 		/* Need to defer quiescent state until everything is enabled. */
-		raise_softirq_irqoff(RCU_SOFTIRQ);
+		if (irqs_were_disabled) {
+			/* Enabling irqs does not reschedule, so... */
+			raise_softirq_irqoff(RCU_SOFTIRQ);
+		} else {
+			/* Enabling BH or preempt does reschedule, so... */
+			set_tsk_need_resched(current);
+			set_preempt_need_resched();
+		}
 		local_irq_restore(flags);
 		return;
 	}
+	WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false);
 	rcu_preempt_deferred_qs_irqrestore(t, flags);
 }
 
@@ -1464,7 +1477,8 @@ static void __init rcu_spawn_boost_kthreads(void)
 
 	for_each_possible_cpu(cpu)
 		per_cpu(rcu_cpu_has_work, cpu) = 0;
-	BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
+	if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
+		return;
 	rcu_for_each_leaf_node(rnp)
 		(void)rcu_spawn_one_boost_kthread(rnp);
 }
@@ -2322,7 +2336,8 @@ static int rcu_nocb_kthread(void *arg)
 		tail = rdp->nocb_follower_tail;
 		rdp->nocb_follower_tail = &rdp->nocb_follower_head;
 		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
-		BUG_ON(!list);
+		if (WARN_ON_ONCE(!list))
+			continue;
 		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeNonEmpty"));
 
 		/* Each pass through the following loop invokes a callback. */
@@ -2495,7 +2510,8 @@ static void rcu_spawn_one_nocb_kthread(int cpu)
 	/* Spawn the kthread for this CPU. */
 	t = kthread_run(rcu_nocb_kthread, rdp_spawn,
 			"rcuo%c/%d", rcu_state.abbr, cpu);
-	BUG_ON(IS_ERR(t));
+	if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo kthread, OOM is now expected behavior\n", __func__))
+		return;
 	WRITE_ONCE(rdp_spawn->nocb_kthread, t);
 }
 
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index f203b94f6b5b..1971869c4072 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -335,8 +335,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
 	/* Initialize and register callbacks for each crcu_array element. */
 	for (i = 0; i < n; i++) {
 		if (checktiny &&
-		    (crcu_array[i] == call_rcu ||
-		     crcu_array[i] == call_rcu_bh)) {
+		    (crcu_array[i] == call_rcu)) {
 			might_sleep();
 			continue;
 		}
@@ -352,8 +351,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
 	/* Wait for all callbacks to be invoked. */
 	for (i = 0; i < n; i++) {
 		if (checktiny &&
-		    (crcu_array[i] == call_rcu ||
-		     crcu_array[i] == call_rcu_bh))
+		    (crcu_array[i] == call_rcu))
 			continue;
 		for (j = 0; j < i; j++)
 			if (crcu_array[j] == crcu_array[i])
@@ -822,7 +820,8 @@ static int __init rcu_spawn_tasks_kthread(void)
 	struct task_struct *t;
 
 	t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
-	BUG_ON(IS_ERR(t));
+	if (WARN_ONCE(IS_ERR(t), "%s: Could not start Tasks-RCU grace-period kthread, OOM is now expected behavior\n", __func__))
+		return 0;
 	smp_mb(); /* Ensure others see full kthread. */
 	WRITE_ONCE(rcu_tasks_kthread_ptr, t);
 	return 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f12225f26b70..ea12ebc57840 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5788,7 +5788,7 @@ int sched_cpu_deactivate(unsigned int cpu)
 	 *
 	 * Do sync before park smpboot threads to take care the rcu boost case.
 	 */
-	synchronize_rcu_mult(call_rcu, call_rcu_sched);
+	synchronize_rcu();
 
 	if (!sched_smp_initialized)
 		return 0;
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 76e0eaf4654e..3cd8a3a795d2 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -210,7 +210,7 @@ static int membarrier_register_global_expedited(void)
 		 * future scheduler executions will observe the new
 		 * thread flag state for this mm.
 		 */
-		synchronize_sched();
+		synchronize_rcu();
 	}
 	atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
 		  &mm->membarrier_state);
@@ -246,7 +246,7 @@ static int membarrier_register_private_expedited(int flags)
 		 * Ensure all future scheduler executions will observe the
 		 * new thread flag state for this process.
 		 */
-		synchronize_sched();
+		synchronize_rcu();
 	}
 	atomic_or(state, &mm->membarrier_state);
 
@@ -298,7 +298,7 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
 		if (tick_nohz_full_enabled())
 			return -EINVAL;
 		if (num_online_cpus() > 1)
-			synchronize_sched();
+			synchronize_rcu();
 		return 0;
 	case MEMBARRIER_CMD_GLOBAL_EXPEDITED:
 		return membarrier_global_expedited();
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f536f601bd46..5b4f73e4fd56 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -173,7 +173,7 @@ static void ftrace_sync(struct work_struct *work)
 {
 	/*
 	 * This function is just a stub to implement a hard force
-	 * of synchronize_sched(). This requires synchronizing
+	 * of synchronize_rcu(). This requires synchronizing
 	 * tasks even in userspace and idle.
 	 *
 	 * Yes, function tracing is rude.
@@ -934,7 +934,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
 			ftrace_profile_enabled = 0;
 			/*
 			 * unregister_ftrace_profiler calls stop_machine
-			 * so this acts like an synchronize_sched.
+			 * so this acts like an synchronize_rcu.
 			 */
 			unregister_ftrace_profiler();
 		}
@@ -1086,7 +1086,7 @@ struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr)
 
 	/*
 	 * Some of the ops may be dynamically allocated,
-	 * they are freed after a synchronize_sched().
+	 * they are freed after a synchronize_rcu().
 	 */
 	preempt_disable_notrace();
 
@@ -1286,7 +1286,7 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
 {
 	if (!hash || hash == EMPTY_HASH)
 		return;
-	call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu);
+	call_rcu(&hash->rcu, __free_ftrace_hash_rcu);
 }
 
 void ftrace_free_filter(struct ftrace_ops *ops)
@@ -1501,7 +1501,7 @@ static bool hash_contains_ip(unsigned long ip,
  * the ip is not in the ops->notrace_hash.
  *
  * This needs to be called with preemption disabled as
- * the hashes are freed with call_rcu_sched().
+ * the hashes are freed with call_rcu().
  */
 static int
 ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
@@ -4496,7 +4496,7 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
 	if (ftrace_enabled && !ftrace_hash_empty(hash))
 		ftrace_run_modify_code(&probe->ops, FTRACE_UPDATE_CALLS,
 				       &old_hash_ops);
-	synchronize_sched();
+	synchronize_rcu();
 
 	hlist_for_each_entry_safe(entry, tmp, &hhd, hlist) {
 		hlist_del(&entry->hlist);
@@ -5314,7 +5314,7 @@ ftrace_graph_release(struct inode *inode, struct file *file)
 		mutex_unlock(&graph_lock);
 
 		/* Wait till all users are no longer using the old hash */
-		synchronize_sched();
+		synchronize_rcu();
 
 		free_ftrace_hash(old_hash);
 	}
@@ -5707,7 +5707,7 @@ void ftrace_release_mod(struct module *mod)
 	list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) {
 		if (mod_map->mod == mod) {
 			list_del_rcu(&mod_map->list);
-			call_rcu_sched(&mod_map->rcu, ftrace_free_mod_map);
+			call_rcu(&mod_map->rcu, ftrace_free_mod_map);
 			break;
 		}
 	}
@@ -5927,7 +5927,7 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
 	struct ftrace_mod_map *mod_map;
 	const char *ret = NULL;
 
-	/* mod_map is freed via call_rcu_sched() */
+	/* mod_map is freed via call_rcu() */
 	preempt_disable();
 	list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) {
 		ret = ftrace_func_address_lookup(mod_map, addr, size, off, sym);
@@ -6262,7 +6262,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 
 	/*
 	 * Some of the ops may be dynamically allocated,
-	 * they must be freed after a synchronize_sched().
+	 * they must be freed after a synchronize_rcu().
 	 */
 	preempt_disable_notrace();
 
@@ -6433,7 +6433,7 @@ static void clear_ftrace_pids(struct trace_array *tr)
 	rcu_assign_pointer(tr->function_pids, NULL);
 
 	/* Wait till all users are no longer using pid filtering */
-	synchronize_sched();
+	synchronize_rcu();
 
 	trace_free_pid_list(pid_list);
 }
@@ -6580,7 +6580,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
 	rcu_assign_pointer(tr->function_pids, pid_list);
 
 	if (filtered_pids) {
-		synchronize_sched();
+		synchronize_rcu();
 		trace_free_pid_list(filtered_pids);
 	} else if (pid_list) {
 		/* Register a probe to set whether to ignore the tracing of a task */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 65bd4616220d..4f3247a53259 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1834,7 +1834,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
 		 * There could have been a race between checking
 		 * record_disable and incrementing it.
 		 */
-		synchronize_sched();
+		synchronize_rcu();
 		for_each_buffer_cpu(buffer, cpu) {
 			cpu_buffer = buffer->buffers[cpu];
 			rb_check_pages(cpu_buffer);
@@ -3151,7 +3151,7 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
  * This prevents all writes to the buffer. Any attempt to write
  * to the buffer after this will fail and return NULL.
  *
- * The caller should call synchronize_sched() after this.
+ * The caller should call synchronize_rcu() after this.
  */
 void ring_buffer_record_disable(struct ring_buffer *buffer)
 {
@@ -3253,7 +3253,7 @@ bool ring_buffer_record_is_set_on(struct ring_buffer *buffer)
  * This prevents all writes to the buffer. Any attempt to write
  * to the buffer after this will fail and return NULL.
  *
- * The caller should call synchronize_sched() after this.
+ * The caller should call synchronize_rcu() after this.
  */
 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
 {
@@ -4191,7 +4191,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
 void
 ring_buffer_read_prepare_sync(void)
 {
-	synchronize_sched();
+	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
 
@@ -4363,7 +4363,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
 	atomic_inc(&cpu_buffer->record_disabled);
 
 	/* Make sure all commits have finished */
-	synchronize_sched();
+	synchronize_rcu();
 
 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 
@@ -4496,7 +4496,7 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
 		goto out;
 
 	/*
-	 * We can't do a synchronize_sched here because this
+	 * We can't do a synchronize_rcu here because this
 	 * function can be called in atomic context.
 	 * Normally this will be called from the same CPU as cpu.
 	 * If not it's up to the caller to protect this.
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ff1c4b20cd0a..51612b4a603f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1681,7 +1681,7 @@ void tracing_reset(struct trace_buffer *buf, int cpu)
 	ring_buffer_record_disable(buffer);
 
 	/* Make sure all commits have finished */
-	synchronize_sched();
+	synchronize_rcu();
 	ring_buffer_reset_cpu(buffer, cpu);
 
 	ring_buffer_record_enable(buffer);
@@ -1698,7 +1698,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf)
 	ring_buffer_record_disable(buffer);
 
 	/* Make sure all commits have finished */
-	synchronize_sched();
+	synchronize_rcu();
 
 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
 
@@ -2250,7 +2250,7 @@ void trace_buffered_event_disable(void)
 	preempt_enable();
 
 	/* Wait for all current users to finish */
-	synchronize_sched();
+	synchronize_rcu();
 
 	for_each_tracing_cpu(cpu) {
 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
@@ -5398,7 +5398,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf)
 	if (tr->current_trace->reset)
 		tr->current_trace->reset(tr);
 
-	/* Current trace needs to be nop_trace before synchronize_sched */
+	/* Current trace needs to be nop_trace before synchronize_rcu */
 	tr->current_trace = &nop_trace;
 
 #ifdef CONFIG_TRACER_MAX_TRACE
@@ -5412,7 +5412,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf)
 		 * The update_max_tr is called from interrupts disabled
 		 * so a synchronized_sched() is sufficient.
 		 */
-		synchronize_sched();
+		synchronize_rcu();
 		free_snapshot(tr);
 	}
 #endif
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 84a65173b1e9..35f3aa55be85 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1614,7 +1614,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir,
 
 	/*
 	 * The calls can still be using the old filters.
-	 * Do a synchronize_sched() and to ensure all calls are
+	 * Do a synchronize_rcu() and to ensure all calls are
 	 * done with them before we free them.
 	 */
 	tracepoint_synchronize_unregister();
@@ -1845,7 +1845,7 @@ int apply_subsystem_event_filter(struct trace_subsystem_dir *dir,
 	if (filter) {
 		/*
 		 * No event actually uses the system filter
-		 * we can free it without synchronize_sched().
+		 * we can free it without synchronize_rcu().
 		 */
 		__free_filter(system->filter);
 		system->filter = filter;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index fec67188c4d2..adc153ab51c0 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -333,7 +333,7 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
 		 * event_call related objects, which will be accessed in
 		 * the kprobe_trace_func/kretprobe_trace_func.
 		 */
-		synchronize_sched();
+		synchronize_rcu();
 		kfree(link);	/* Ignored if link == NULL */
 	}
 
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index a3be42304485..46f2ab1e08a9 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -92,7 +92,7 @@ static __init int release_early_probes(void)
 	while (early_probes) {
 		tmp = early_probes;
 		early_probes = tmp->next;
-		call_rcu_sched(tmp, rcu_free_old_probes);
+		call_rcu(tmp, rcu_free_old_probes);
 	}
 
 	return 0;
@@ -123,7 +123,7 @@ static inline void release_probes(struct tracepoint_func *old)
 		 * cover both cases. So let us chain the SRCU and sched RCU
 		 * callbacks to wait for both grace periods.
 		 */
-		call_rcu_sched(&tp_probes->rcu, rcu_free_old_probes);
+		call_rcu(&tp_probes->rcu, rcu_free_old_probes);
 	}
 }
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0280deac392e..392be4b252f6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3396,7 +3396,7 @@ static void put_unbound_pool(struct worker_pool *pool)
 	del_timer_sync(&pool->mayday_timer);
 
 	/* sched-RCU protected to allow dereferences from get_work_pool() */
-	call_rcu_sched(&pool->rcu, rcu_free_pool);
+	call_rcu(&pool->rcu, rcu_free_pool);
 }
 
 /**
@@ -3503,14 +3503,14 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 	put_unbound_pool(pool);
 	mutex_unlock(&wq_pool_mutex);
 
-	call_rcu_sched(&pwq->rcu, rcu_free_pwq);
+	call_rcu(&pwq->rcu, rcu_free_pwq);
 
 	/*
 	 * If we're the last pwq going away, @wq is already dead and no one
 	 * is gonna access it anymore.  Schedule RCU free.
 	 */
 	if (is_last)
-		call_rcu_sched(&wq->rcu, rcu_free_wq);
+		call_rcu(&wq->rcu, rcu_free_wq);
 }
 
 /**
@@ -4195,7 +4195,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		 * The base ref is never dropped on per-cpu pwqs.  Directly
 		 * schedule RCU free.
 		 */
-		call_rcu_sched(&wq->rcu, rcu_free_wq);
+		call_rcu(&wq->rcu, rcu_free_wq);
 	} else {
 		/*
 		 * We're the sole accessor of @wq at this point.  Directly
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index de10b8c0bff6..9877682e49c7 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -181,7 +181,7 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 	ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch;
 
 	percpu_ref_get(ref);	/* put after confirmation */
-	call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
+	call_rcu(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
 }
 
 static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index c13625c1ad5e..7b86600a47c9 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1225,7 +1225,7 @@ static void collect_mm_slot(struct mm_slot *mm_slot)
 {
 	struct mm_struct *mm = mm_slot->mm;
 
-	VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock));
+	lockdep_assert_held(&khugepaged_mm_lock);
 
 	if (khugepaged_test_exit(mm)) {
 		/* free mm_slot */
@@ -1631,7 +1631,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 	int progress = 0;
 
 	VM_BUG_ON(!pages);
-	VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock));
+	lockdep_assert_held(&khugepaged_mm_lock);
 
 	if (khugepaged_scan.mm_slot)
 		mm_slot = khugepaged_scan.mm_slot;
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 2a9fbc4a37d5..f2f03c655807 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -199,7 +199,7 @@ void tlb_table_flush(struct mmu_gather *tlb)
 
 	if (*batch) {
 		tlb_table_invalidate(tlb);
-		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+		call_rcu(&(*batch)->rcu, tlb_remove_table_rcu);
 		*batch = NULL;
 	}
 }
diff --git a/mm/slab.c b/mm/slab.c
index 2a5654bb3b3f..3abb9feb3818 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -962,10 +962,10 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
 	 * To protect lockless access to n->shared during irq disabled context.
 	 * If n->shared isn't NULL in irq disabled context, accessing to it is
 	 * guaranteed to be valid until irq is re-enabled, because it will be
-	 * freed after synchronize_sched().
+	 * freed after synchronize_rcu().
 	 */
 	if (old_shared && force_change)
-		synchronize_sched();
+		synchronize_rcu();
 
 fail:
 	kfree(old_shared);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 7eb8dc136c1c..9c11e8a937d2 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -724,7 +724,7 @@ void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
 	css_get(&s->memcg_params.memcg->css);
 
 	s->memcg_params.deact_fn = deact_fn;
-	call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn);
+	call_rcu(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn);
 }
 
 void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
@@ -839,11 +839,11 @@ static void flush_memcg_workqueue(struct kmem_cache *s)
 	mutex_unlock(&slab_mutex);
 
 	/*
-	 * SLUB deactivates the kmem_caches through call_rcu_sched. Make
+	 * SLUB deactivates the kmem_caches through call_rcu. Make
 	 * sure all registered rcu callbacks have been invoked.
 	 */
 	if (IS_ENABLED(CONFIG_SLUB))
-		rcu_barrier_sched();
+		rcu_barrier();
 
 	/*
 	 * SLAB and SLUB create memcg kmem_caches through workqueue and SLUB
diff --git a/mm/swap.c b/mm/swap.c
index aa483719922e..5d786019eab9 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -823,8 +823,7 @@ void lru_add_page_tail(struct page *page, struct page *page_tail,
 	VM_BUG_ON_PAGE(!PageHead(page), page);
 	VM_BUG_ON_PAGE(PageCompound(page_tail), page);
 	VM_BUG_ON_PAGE(PageLRU(page_tail), page);
-	VM_BUG_ON(NR_CPUS != 1 &&
-		  !spin_is_locked(&lruvec_pgdat(lruvec)->lru_lock));
+	lockdep_assert_held(&lruvec_pgdat(lruvec)->lru_lock);
 
 	if (!list)
 		SetPageLRU(page_tail);
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index a7ea2d431714..596ec6e7df11 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -728,7 +728,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
 		rcu_assign_pointer(*pp, p->next);
 		hlist_del_init(&p->mglist);
 		del_timer(&p->timer);
-		call_rcu_bh(&p->rcu, br_multicast_free_pg);
+		call_rcu(&p->rcu, br_multicast_free_pg);
 		err = 0;
 
 		if (!mp->ports && !mp->host_joined &&
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 6bac0d6b7b94..0255223f2001 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -260,7 +260,7 @@ static void br_multicast_group_expired(struct timer_list *t)
 	hlist_del_rcu(&mp->hlist[mdb->ver]);
 	mdb->size--;
 
-	call_rcu_bh(&mp->rcu, br_multicast_free_group);
+	call_rcu(&mp->rcu, br_multicast_free_group);
 
 out:
 	spin_unlock(&br->multicast_lock);
@@ -291,7 +291,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
 		del_timer(&p->timer);
 		br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB,
 			      p->flags);
-		call_rcu_bh(&p->rcu, br_multicast_free_pg);
+		call_rcu(&p->rcu, br_multicast_free_pg);
 
 		if (!mp->ports && !mp->host_joined &&
 		    netif_running(br->dev))
@@ -358,7 +358,7 @@ static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max,
 	}
 
 	br_mdb_rehash_seq++;
-	call_rcu_bh(&mdb->rcu, br_mdb_free);
+	call_rcu(&mdb->rcu, br_mdb_free);
 
 out:
 	rcu_assign_pointer(*mdbp, mdb);
@@ -1629,7 +1629,7 @@ br_multicast_leave_group(struct net_bridge *br,
 			rcu_assign_pointer(*pp, p->next);
 			hlist_del_init(&p->mglist);
 			del_timer(&p->timer);
-			call_rcu_bh(&p->rcu, br_multicast_free_pg);
+			call_rcu(&p->rcu, br_multicast_free_pg);
 			br_mdb_notify(br->dev, port, group, RTM_DELMDB,
 				      p->flags);
 
@@ -2051,19 +2051,19 @@ void br_multicast_dev_del(struct net_bridge *br)
 		hlist_for_each_entry_safe(mp, n, &mdb->mhash[i],
 					  hlist[ver]) {
 			del_timer(&mp->timer);
-			call_rcu_bh(&mp->rcu, br_multicast_free_group);
+			call_rcu(&mp->rcu, br_multicast_free_group);
 		}
 	}
 
 	if (mdb->old) {
 		spin_unlock_bh(&br->multicast_lock);
-		rcu_barrier_bh();
+		rcu_barrier();
 		spin_lock_bh(&br->multicast_lock);
 		WARN_ON(mdb->old);
 	}
 
 	mdb->old = mdb;
-	call_rcu_bh(&mdb->rcu, br_mdb_free);
+	call_rcu(&mdb->rcu, br_mdb_free);
 
 out:
 	spin_unlock_bh(&br->multicast_lock);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 5da9552b186b..677d3f332172 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -800,7 +800,7 @@ void __netpoll_cleanup(struct netpoll *np)
 			ops->ndo_netpoll_cleanup(np->dev);
 
 		RCU_INIT_POINTER(np->dev->npinfo, NULL);
-		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
+		call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info);
 	} else
 		RCU_INIT_POINTER(np->dev->npinfo, NULL);
 }
@@ -811,7 +811,7 @@ void __netpoll_free(struct netpoll *np)
 	ASSERT_RTNL();
 
 	/* Wait for transmitting packets to finish before freeing. */
-	synchronize_rcu_bh();
+	synchronize_rcu();
 	__netpoll_cleanup(np);
 	kfree(np);
 }
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 56a99d0c9aa0..c92d6ccce610 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -580,7 +580,7 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
 	write_unlock_bh(&sk->sk_callback_lock);
 	sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
 
-	call_rcu_sched(&psock->rcu, sk_psock_destroy);
+	call_rcu(&psock->rcu, sk_psock_destroy);
 }
 EXPORT_SYMBOL_GPL(sk_psock_drop);
 
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 7d6ff983ba2c..dbd0f7bae00a 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2405,7 +2405,7 @@ static void __exit decnet_exit(void)
 
 	proto_unregister(&dn_proto);
 
-	rcu_barrier_bh(); /* Wait for completion of call_rcu_bh()'s */
+	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 }
 module_exit(decnet_exit);
 #endif
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ca3b0f46de53..016e628c6ac9 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -540,7 +540,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
 
 	if (--tab->refcnt == 0) {
 		list_del(&tab->list);
-		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
+		call_rcu(&tab->rcu, stab_kfree_rcu);
 	}
 }
 EXPORT_SYMBOL(qdisc_put_stab);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index de1663f7d3ad..66ba2ce2320f 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1372,7 +1372,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
 	if (!tp_head) {
 		RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
 		/* Wait for flying RCU callback before it is freed. */
-		rcu_barrier_bh();
+		rcu_barrier();
 		return;
 	}
 
@@ -1380,10 +1380,10 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
 		&miniqp->miniq1 : &miniqp->miniq2;
 
 	/* We need to make sure that readers won't see the miniq
-	 * we are about to modify. So wait until previous call_rcu_bh callback
+	 * we are about to modify. So wait until previous call_rcu callback
 	 * is done.
 	 */
-	rcu_barrier_bh();
+	rcu_barrier();
 	miniq->filter_list = tp_head;
 	rcu_assign_pointer(*miniqp->p_miniq, miniq);
 
@@ -1392,7 +1392,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
 		 * block potential new user of miniq_old until all readers
 		 * are not seeing it.
 		 */
-		call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func);
+		call_rcu(&miniq_old->rcu, mini_qdisc_rcu_func);
 }
 EXPORT_SYMBOL(mini_qdisc_pair_swap);
 
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index c883ec55654f..377f373db6c0 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -573,6 +573,27 @@ foreach my $entry (@mode_permission_funcs) {
 }
 $mode_perms_search = "(?:${mode_perms_search})";
 
+our %deprecated_apis = (
+	"synchronize_rcu_bh"			=> "synchronize_rcu",
+	"synchronize_rcu_bh_expedited"		=> "synchronize_rcu_expedited",
+	"call_rcu_bh"				=> "call_rcu",
+	"rcu_barrier_bh"			=> "rcu_barrier",
+	"synchronize_sched"			=> "synchronize_rcu",
+	"synchronize_sched_expedited"		=> "synchronize_rcu_expedited",
+	"call_rcu_sched"			=> "call_rcu",
+	"rcu_barrier_sched"			=> "rcu_barrier",
+	"get_state_synchronize_sched"		=> "get_state_synchronize_rcu",
+	"cond_synchronize_sched"		=> "cond_synchronize_rcu",
+);
+
+#Create a search pattern for all these strings to speed up a loop below
+our $deprecated_apis_search = "";
+foreach my $entry (keys %deprecated_apis) {
+	$deprecated_apis_search .= '|' if ($deprecated_apis_search ne "");
+	$deprecated_apis_search .= $entry;
+}
+$deprecated_apis_search = "(?:${deprecated_apis_search})";
+
 our $mode_perms_world_writable = qr{
 	S_IWUGO		|
 	S_IWOTH		|
@@ -6368,6 +6389,20 @@ sub process {
 			     "please use device_initcall() or more appropriate function instead of __initcall() (see include/linux/init.h)\n" . $herecurr);
 		}
 
+# check for spin_is_locked(), suggest lockdep instead
+		if ($line =~ /\bspin_is_locked\(/) {
+			WARN("USE_LOCKDEP",
+			     "Where possible, use lockdep_assert_held instead of assertions based on spin_is_locked\n" . $herecurr);
+		}
+
+# check for deprecated apis
+		if ($line =~ /\b($deprecated_apis_search)\b\s*\(/) {
+			my $deprecated_api = $1;
+			my $new_api = $deprecated_apis{$deprecated_api};
+			WARN("DEPRECATED_API",
+			     "Deprecated use of '$deprecated_api', prefer '$new_api' instead\n" . $herecurr);
+		}
+
 # check for various structs that are normally const (ops, kgdb, device_tree)
 # and avoid what seem like struct definitions 'struct foo {'
 		if ($line !~ /\bconst\b/ &&
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 6935ef94e77a..857d9e22826e 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -116,6 +116,6 @@ int scnprintf(char * buf, size_t size, const char * fmt, ...);
 #define round_down(x, y) ((x) & ~__round_mask(x, y))
 
 #define current_gfp_context(k) 0
-#define synchronize_sched()
+#define synchronize_rcu()
 
 #endif
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 5a7a62d76a50..19864f1cb27a 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -194,6 +194,14 @@ do
 	shift
 done
 
+if test -z "$TORTURE_INITRD" || tools/testing/selftests/rcutorture/bin/mkinitrd.sh
+then
+	:
+else
+	echo No initrd and unable to create one, aborting test >&2
+	exit 1
+fi
+
 CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG
 
 if test -z "$configs"
diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
new file mode 100755
index 000000000000..da298394daa2
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+#
+# Create an initrd directory if one does not already exist.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Author: Connor Shu <Connor.Shu@ibm.com>
+
+D=tools/testing/selftests/rcutorture
+
+# Prerequisite checks
+[ -z "$D" ] && echo >&2 "No argument supplied" && exit 1
+if [ ! -d "$D" ]; then
+    echo >&2 "$D does not exist: Malformed kernel source tree?"
+    exit 1
+fi
+if [ -s "$D/initrd/init" ]; then
+    echo "$D/initrd/init already exists, no need to create it"
+    exit 0
+fi
+
+T=${TMPDIR-/tmp}/mkinitrd.sh.$$
+trap 'rm -rf $T' 0 2
+mkdir $T
+
+cat > $T/init << '__EOF___'
+#!/bin/sh
+# Run in userspace a few milliseconds every second.  This helps to
+# exercise the NO_HZ_FULL portions of RCU.
+while :
+do
+	q=
+	for i in \
+		a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \
+		a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \
+		a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \
+		a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \
+		a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \
+		a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
+	do
+		q="$q $i"
+	done
+	sleep 1
+done
+__EOF___
+
+# Try using dracut to create initrd
+if command -v dracut >/dev/null 2>&1
+then
+	echo Creating $D/initrd using dracut.
+	# Filesystem creation
+	dracut --force --no-hostonly --no-hostonly-cmdline --module "base" $T/initramfs.img
+	cd $D
+	mkdir -p initrd
+	cd initrd
+	zcat $T/initramfs.img | cpio -id
+	cp $T/init init
+	chmod +x init
+	echo Done creating $D/initrd using dracut
+	exit 0
+fi
+
+# No dracut, so create a C-language initrd/init program and statically
+# link it.  This results in a very small initrd, but might be a bit less
+# future-proof than dracut.
+echo "Could not find dracut, attempting C initrd"
+cd $D
+mkdir -p initrd
+cd initrd
+cat > init.c << '___EOF___'
+#ifndef NOLIBC
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
+volatile unsigned long delaycount;
+
+int main(int argc, int argv[])
+{
+	int i;
+	struct timeval tv;
+	struct timeval tvb;
+
+	for (;;) {
+		sleep(1);
+		/* Need some userspace time. */
+		if (gettimeofday(&tvb, NULL))
+			continue;
+		do {
+			for (i = 0; i < 1000 * 100; i++)
+				delaycount = i * i;
+			if (gettimeofday(&tv, NULL))
+				break;
+			tv.tv_sec -= tvb.tv_sec;
+			if (tv.tv_sec > 1)
+				break;
+			tv.tv_usec += tv.tv_sec * 1000 * 1000;
+			tv.tv_usec -= tvb.tv_usec;
+		} while (tv.tv_usec < 1000);
+	}
+	return 0;
+}
+___EOF___
+
+# build using nolibc on supported archs (smaller executable) and fall
+# back to regular glibc on other ones.
+if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \
+           "||__ARM_EABI__||__aarch64__\nyes\n#endif" \
+   | ${CROSS_COMPILE}gcc -E -nostdlib -xc - \
+   | grep -q '^yes'; then
+	# architecture supported by nolibc
+        ${CROSS_COMPILE}gcc -fno-asynchronous-unwind-tables -fno-ident \
+		-nostdlib -include ../bin/nolibc.h -lgcc -s -static -Os \
+		-o init init.c
+else
+	${CROSS_COMPILE}gcc -s -static -Os -o init init.c
+fi
+
+rm init.c
+echo "Done creating a statically linked C-language initrd"
+
+exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/nolibc.h b/tools/testing/selftests/rcutorture/bin/nolibc.h
new file mode 100644
index 000000000000..f98f5b92d3eb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/nolibc.h
@@ -0,0 +1,2197 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/* nolibc.h
+ * Copyright (C) 2017-2018 Willy Tarreau <w@1wt.eu>
+ */
+
+/* some archs (at least aarch64) don't expose the regular syscalls anymore by
+ * default, either because they have an "_at" replacement, or because there are
+ * more modern alternatives. For now we'd rather still use them.
+ */
+#define __ARCH_WANT_SYSCALL_NO_AT
+#define __ARCH_WANT_SYSCALL_NO_FLAGS
+#define __ARCH_WANT_SYSCALL_DEPRECATED
+
+#include <asm/unistd.h>
+#include <asm/ioctls.h>
+#include <asm/errno.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+
+#define NOLIBC
+
+/* Build a static executable this way :
+ *      $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+ *            -static -include nolibc.h -lgcc -o hello hello.c
+ *
+ * Useful calling convention table found here :
+ *      http://man7.org/linux/man-pages/man2/syscall.2.html
+ *
+ * This doc is even better :
+ *      https://w3challs.com/syscalls/
+ */
+
+
+/* this way it will be removed if unused */
+static int errno;
+
+#ifndef NOLIBC_IGNORE_ERRNO
+#define SET_ERRNO(v) do { errno = (v); } while (0)
+#else
+#define SET_ERRNO(v) do { } while (0)
+#endif
+
+/* errno codes all ensure that they will not conflict with a valid pointer
+ * because they all correspond to the highest addressable memry page.
+ */
+#define MAX_ERRNO 4095
+
+/* Declare a few quite common macros and types that usually are in stdlib.h,
+ * stdint.h, ctype.h, unistd.h and a few other common locations.
+ */
+
+#define NULL ((void *)0)
+
+/* stdint types */
+typedef unsigned char       uint8_t;
+typedef   signed char        int8_t;
+typedef unsigned short     uint16_t;
+typedef   signed short      int16_t;
+typedef unsigned int       uint32_t;
+typedef   signed int        int32_t;
+typedef unsigned long long uint64_t;
+typedef   signed long long  int64_t;
+typedef unsigned long        size_t;
+typedef   signed long       ssize_t;
+typedef unsigned long     uintptr_t;
+typedef   signed long      intptr_t;
+typedef   signed long     ptrdiff_t;
+
+/* for stat() */
+typedef unsigned int          dev_t;
+typedef unsigned long         ino_t;
+typedef unsigned int         mode_t;
+typedef   signed int          pid_t;
+typedef unsigned int          uid_t;
+typedef unsigned int          gid_t;
+typedef unsigned long       nlink_t;
+typedef   signed long         off_t;
+typedef   signed long     blksize_t;
+typedef   signed long      blkcnt_t;
+typedef   signed long        time_t;
+
+/* for poll() */
+struct pollfd {
+    int fd;
+    short int events;
+    short int revents;
+};
+
+/* for select() */
+struct timeval {
+	long    tv_sec;
+	long    tv_usec;
+};
+
+/* for pselect() */
+struct timespec {
+	long    tv_sec;
+	long    tv_nsec;
+};
+
+/* for gettimeofday() */
+struct timezone {
+	int tz_minuteswest;
+	int tz_dsttime;
+};
+
+/* for getdents64() */
+struct linux_dirent64 {
+	uint64_t       d_ino;
+	int64_t        d_off;
+	unsigned short d_reclen;
+	unsigned char  d_type;
+	char           d_name[];
+};
+
+/* commonly an fd_set represents 256 FDs */
+#define FD_SETSIZE 256
+typedef struct { uint32_t fd32[FD_SETSIZE/32]; } fd_set;
+
+/* needed by wait4() */
+struct rusage {
+	struct timeval ru_utime;
+	struct timeval ru_stime;
+	long   ru_maxrss;
+	long   ru_ixrss;
+	long   ru_idrss;
+	long   ru_isrss;
+	long   ru_minflt;
+	long   ru_majflt;
+	long   ru_nswap;
+	long   ru_inblock;
+	long   ru_oublock;
+	long   ru_msgsnd;
+	long   ru_msgrcv;
+	long   ru_nsignals;
+	long   ru_nvcsw;
+	long   ru_nivcsw;
+};
+
+/* stat flags (WARNING, octal here) */
+#define S_IFDIR       0040000
+#define S_IFCHR       0020000
+#define S_IFBLK       0060000
+#define S_IFREG       0100000
+#define S_IFIFO       0010000
+#define S_IFLNK       0120000
+#define S_IFSOCK      0140000
+#define S_IFMT        0170000
+
+#define S_ISDIR(mode)  (((mode) & S_IFDIR) == S_IFDIR)
+#define S_ISCHR(mode)  (((mode) & S_IFCHR) == S_IFCHR)
+#define S_ISBLK(mode)  (((mode) & S_IFBLK) == S_IFBLK)
+#define S_ISREG(mode)  (((mode) & S_IFREG) == S_IFREG)
+#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO)
+#define S_ISLNK(mode)  (((mode) & S_IFLNK) == S_IFLNK)
+#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK)
+
+#define DT_UNKNOWN 0
+#define DT_FIFO    1
+#define DT_CHR     2
+#define DT_DIR     4
+#define DT_BLK     6
+#define DT_REG     8
+#define DT_LNK    10
+#define DT_SOCK   12
+
+/* all the *at functions */
+#ifndef AT_FDWCD
+#define AT_FDCWD             -100
+#endif
+
+/* lseek */
+#define SEEK_SET        0
+#define SEEK_CUR        1
+#define SEEK_END        2
+
+/* reboot */
+#define LINUX_REBOOT_MAGIC1         0xfee1dead
+#define LINUX_REBOOT_MAGIC2         0x28121969
+#define LINUX_REBOOT_CMD_HALT       0xcdef0123
+#define LINUX_REBOOT_CMD_POWER_OFF  0x4321fedc
+#define LINUX_REBOOT_CMD_RESTART    0x01234567
+#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
+
+
+/* The format of the struct as returned by the libc to the application, which
+ * significantly differs from the format returned by the stat() syscall flavours.
+ */
+struct stat {
+	dev_t     st_dev;     /* ID of device containing file */
+	ino_t     st_ino;     /* inode number */
+	mode_t    st_mode;    /* protection */
+	nlink_t   st_nlink;   /* number of hard links */
+	uid_t     st_uid;     /* user ID of owner */
+	gid_t     st_gid;     /* group ID of owner */
+	dev_t     st_rdev;    /* device ID (if special file) */
+	off_t     st_size;    /* total size, in bytes */
+	blksize_t st_blksize; /* blocksize for file system I/O */
+	blkcnt_t  st_blocks;  /* number of 512B blocks allocated */
+	time_t    st_atime;   /* time of last access */
+	time_t    st_mtime;   /* time of last modification */
+	time_t    st_ctime;   /* time of last status change */
+};
+
+#define WEXITSTATUS(status)   (((status) & 0xff00) >> 8)
+#define WIFEXITED(status)     (((status) & 0x7f) == 0)
+
+
+/* Below comes the architecture-specific code. For each architecture, we have
+ * the syscall declarations and the _start code definition. This is the only
+ * global part. On all architectures the kernel puts everything in the stack
+ * before jumping to _start just above us, without any return address (_start
+ * is not a function but an entry pint). So at the stack pointer we find argc.
+ * Then argv[] begins, and ends at the first NULL. Then we have envp which
+ * starts and ends with a NULL as well. So envp=argv+argc+1.
+ */
+
+#if defined(__x86_64__)
+/* Syscalls for x86_64 :
+ *   - registers are 64-bit
+ *   - syscall number is passed in rax
+ *   - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
+ *   - the system call is performed by calling the syscall instruction
+ *   - syscall return comes in rax
+ *   - rcx and r8..r11 may be clobbered, others are preserved.
+ *   - the arguments are cast to long and assigned into the target registers
+ *     which are then simply passed as registers to the asm code, so that we
+ *     don't have to experience issues with register constraints.
+ *   - the syscall number is always specified last in order to allow to force
+ *     some registers before (gcc refuses a %-register at the last position).
+ */
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+	long _ret;                                                            \
+	register long _num  asm("rax") = (num);                               \
+									      \
+	asm volatile (                                                        \
+		"syscall\n"                                                   \
+		: "=a" (_ret)                                                 \
+		: "0"(_num)                                                   \
+		: "rcx", "r8", "r9", "r10", "r11", "memory", "cc"                                \
+	);                                                                    \
+	_ret;                                                                 \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+	long _ret;                                                            \
+	register long _num  asm("rax") = (num);                               \
+	register long _arg1 asm("rdi") = (long)(arg1);                        \
+									      \
+	asm volatile (                                                        \
+		"syscall\n"                                                   \
+		: "=a" (_ret)                                                 \
+		: "r"(_arg1),                                                 \
+		  "0"(_num)                                                   \
+		: "rcx", "r8", "r9", "r10", "r11", "memory", "cc"                                \
+	);                                                                    \
+	_ret;                                                                 \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+	long _ret;                                                            \
+	register long _num  asm("rax") = (num);                               \
+	register long _arg1 asm("rdi") = (long)(arg1);                        \
+	register long _arg2 asm("rsi") = (long)(arg2);                        \
+									      \
+	asm volatile (                                                        \
+		"syscall\n"                                                   \
+		: "=a" (_ret)                                                 \
+		: "r"(_arg1), "r"(_arg2),                                     \
+		  "0"(_num)                                                   \
+		: "rcx", "r8", "r9", "r10", "r11", "memory", "cc"                                \
+	);                                                                    \
+	_ret;                                                                 \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+	long _ret;                                                            \
+	register long _num  asm("rax") = (num);                               \
+	register long _arg1 asm("rdi") = (long)(arg1);                        \
+	register long _arg2 asm("rsi") = (long)(arg2);                        \
+	register long _arg3 asm("rdx") = (long)(arg3);                        \
+									      \
+	asm volatile (                                                        \
+		"syscall\n"                                                   \
+		: "=a" (_ret)                                                 \
+		: "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+		  "0"(_num)                                                   \
+		: "rcx", "r8", "r9", "r10", "r11", "memory", "cc"                                \
+	);                                                                    \
+	_ret;                                                                 \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+	long _ret;                                                            \
+	register long _num  asm("rax") = (num);                               \
+	register long _arg1 asm("rdi") = (long)(arg1);                        \
+	register long _arg2 asm("rsi") = (long)(arg2);                        \
+	register long _arg3 asm("rdx") = (long)(arg3);                        \
+	register long _arg4 asm("r10") = (long)(arg4);                        \
+									      \
+	asm volatile (                                                        \
+		"syscall\n"                                                   \
+		: "=a" (_ret), "=r"(_arg4)                                    \
+		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+		  "0"(_num)                                                   \
+		: "rcx", "r8", "r9", "r11", "memory", "cc"                    \
+	);                                                                    \
+	_ret;                                                                 \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+	long _ret;                                                            \
+	register long _num  asm("rax") = (num);                               \
+	register long _arg1 asm("rdi") = (long)(arg1);                        \
+	register long _arg2 asm("rsi") = (long)(arg2);                        \
+	register long _arg3 asm("rdx") = (long)(arg3);                        \
+	register long _arg4 asm("r10") = (long)(arg4);                        \
+	register long _arg5 asm("r8")  = (long)(arg5);                        \
+									      \
+	asm volatile (                                                        \
+		"syscall\n"                                                   \
+		: "=a" (_ret), "=r"(_arg4), "=r"(_arg5)                       \
+		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+		  "0"(_num)                                                   \
+		: "rcx", "r9", "r11", "memory", "cc"                          \
+	);                                                                    \
+	_ret;                                                                 \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
+({                                                                            \
+	long _ret;                                                            \
+	register long _num  asm("rax") = (num);                               \
+	register long _arg1 asm("rdi") = (long)(arg1);                        \
+	register long _arg2 asm("rsi") = (long)(arg2);                        \
+	register long _arg3 asm("rdx") = (long)(arg3);                        \
+	register long _arg4 asm("r10") = (long)(arg4);                        \
+	register long _arg5 asm("r8")  = (long)(arg5);                        \
+	register long _arg6 asm("r9")  = (long)(arg6);                        \
+									      \
+	asm volatile (                                                        \
+		"syscall\n"                                                   \
+		: "=a" (_ret), "=r"(_arg4), "=r"(_arg5)                       \
+		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+		  "r"(_arg6), "0"(_num)                                       \
+		: "rcx", "r11", "memory", "cc"                                \
+	);                                                                    \
+	_ret;                                                                 \
+})
+
+/* startup code */
+asm(".section .text\n"
+    ".global _start\n"
+    "_start:\n"
+    "pop %rdi\n"                // argc   (first arg, %rdi)
+    "mov %rsp, %rsi\n"          // argv[] (second arg, %rsi)
+    "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
+    "and $-16, %rsp\n"          // x86 ABI : esp must be 16-byte aligned when
+    "sub $8, %rsp\n"            // entering the callee
+    "call main\n"               // main() returns the status code, we'll exit with it.
+    "movzb %al, %rdi\n"         // retrieve exit code from 8 lower bits
+    "mov $60, %rax\n"           // NR_exit == 60
+    "syscall\n"                 // really exit
+    "hlt\n"                     // ensure it does not return
+    "");
+
+/* fcntl / open */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_CREAT          0x40
+#define O_EXCL           0x80
+#define O_NOCTTY        0x100
+#define O_TRUNC         0x200
+#define O_APPEND        0x400
+#define O_NONBLOCK      0x800
+#define O_DIRECTORY   0x10000
+
+/* The struct returned by the stat() syscall, equivalent to stat64(). The
+ * syscall returns 116 bytes and stops in the middle of __unused.
+ */
+struct sys_stat_struct {
+	unsigned long st_dev;
+	unsigned long st_ino;
+	unsigned long st_nlink;
+	unsigned int  st_mode;
+	unsigned int  st_uid;
+
+	unsigned int  st_gid;
+	unsigned int  __pad0;
+	unsigned long st_rdev;
+	long          st_size;
+	long          st_blksize;
+
+	long          st_blocks;
+	unsigned long st_atime;
+	unsigned long st_atime_nsec;
+	unsigned long st_mtime;
+
+	unsigned long st_mtime_nsec;
+	unsigned long st_ctime;
+	unsigned long st_ctime_nsec;
+	long          __unused[3];
+};
+
+#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
+/* Syscalls for i386 :
+ *   - mostly similar to x86_64
+ *   - registers are 32-bit
+ *   - syscall number is passed in eax
+ *   - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
+ *   - all registers are preserved (except eax of course)
+ *   - the system call is performed by calling int $0x80
+ *   - syscall return comes in eax
+ *   - the arguments are cast to long and assigned into the target registers
+ *     which are then simply passed as registers to the asm code, so that we
+ *     don't have to experience issues with register constraints.
+ *   - the syscall number is always specified last in order to allow to force
+ *     some registers before (gcc refuses a %-register at the last position).
+ *
+ * Also, i386 supports the old_select syscall if newselect is not available
+ */
+#define __ARCH_WANT_SYS_OLD_SELECT
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+	long _ret;                                                            \
+	register long _num asm("eax") = (num);                                \
+									      \
+	asm volatile (                                                        \
+		"int $0x80\n"                                                 \
+		: "=a" (_ret)                                                 \
+		: "0"(_num)                                                   \
+		: "memory", "cc"                                              \
+	);                                                                    \
+	_ret;                                                                 \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+	long _ret;                                                            \
+	register long _num asm("eax") = (num);                                \
+	register long _arg1 asm("ebx") = (long)(arg1);                        \
+									      \
+	asm volatile (                                                        \
+		"int $0x80\n"                                                 \
+		: "=a" (_ret)                                                 \
+		: "r"(_arg1),                                                 \
+		  "0"(_num)                                                   \
+		: "memory", "cc"                                              \
+	);                                                                    \
+	_ret;                                                                 \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+	long _ret;                                                            \
+	register long _num asm("eax") = (num);                                \
+	register long _arg1 asm("ebx") = (long)(arg1);                        \
+	register long _arg2 asm("ecx") = (long)(arg2);                        \
+									      \
+	asm volatile (                                                        \
+		"int $0x80\n"                                                 \
+		: "=a" (_ret)                                                 \
+		: "r"(_arg1), "r"(_arg2),                                     \
+		  "0"(_num)                                                   \
+		: "memory", "cc"                                              \
+	);                                                                    \
+	_ret;                                                                 \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+	long _ret;                                                            \
+	register long _num asm("eax") = (num);                                \
+	register long _arg1 asm("ebx") = (long)(arg1);                        \
+	register long _arg2 asm("ecx") = (long)(arg2);                        \
+	register long _arg3 asm("edx") = (long)(arg3);                        \
+									      \
+	asm volatile (                                                        \
+		"int $0x80\n"                                                 \
+		: "=a" (_ret)                                                 \
+		: "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+		  "0"(_num)                                                   \
+		: "memory", "cc"                                              \
+	);                                                                    \
+	_ret;                                                                 \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+	long _ret;                                                            \
+	register long _num asm("eax") = (num);                                \
+	register long _arg1 asm("ebx") = (long)(arg1);                        \
+	register long _arg2 asm("ecx") = (long)(arg2);                        \
+	register long _arg3 asm("edx") = (long)(arg3);                        \
+	register long _arg4 asm("esi") = (long)(arg4);                        \
+									      \
+	asm volatile (                                                        \
+		"int $0x80\n"                                                 \
+		: "=a" (_ret)                                                 \
+		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+		  "0"(_num)                                                   \
+		: "memory", "cc"                                              \
+	);                                                                    \
+	_ret;                                                                 \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+	long _ret;                                                            \
+	register long _num asm("eax") = (num);                                \
+	register long _arg1 asm("ebx") = (long)(arg1);                        \
+	register long _arg2 asm("ecx") = (long)(arg2);                        \
+	register long _arg3 asm("edx") = (long)(arg3);                        \
+	register long _arg4 asm("esi") = (long)(arg4);                        \
+	register long _arg5 asm("edi") = (long)(arg5);                        \
+									      \
+	asm volatile (                                                        \
+		"int $0x80\n"                                                 \
+		: "=a" (_ret)                                                 \
+		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+		  "0"(_num)                                                   \
+		: "memory", "cc"                                              \
+	);                                                                    \
+	_ret;                                                                 \
+})
+
+/* startup code */
+asm(".section .text\n"
+    ".global _start\n"
+    "_start:\n"
+    "pop %eax\n"                // argc   (first arg, %eax)
+    "mov %esp, %ebx\n"          // argv[] (second arg, %ebx)
+    "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
+    "and $-16, %esp\n"          // x86 ABI : esp must be 16-byte aligned when
+    "push %ecx\n"               // push all registers on the stack so that we
+    "push %ebx\n"               // support both regparm and plain stack modes
+    "push %eax\n"
+    "call main\n"               // main() returns the status code in %eax
+    "movzbl %al, %ebx\n"        // retrieve exit code from lower 8 bits
+    "movl   $1, %eax\n"         // NR_exit == 1
+    "int    $0x80\n"            // exit now
+    "hlt\n"                     // ensure it does not
+    "");
+
+/* fcntl / open */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_CREAT          0x40
+#define O_EXCL           0x80
+#define O_NOCTTY        0x100
+#define O_TRUNC         0x200
+#define O_APPEND        0x400
+#define O_NONBLOCK      0x800
+#define O_DIRECTORY   0x10000
+
+/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+ * exactly 56 bytes (stops before the unused array).
+ */
+struct sys_stat_struct {
+	unsigned long  st_dev;
+	unsigned long  st_ino;
+	unsigned short st_mode;
+	unsigned short st_nlink;
+	unsigned short st_uid;
+	unsigned short st_gid;
+
+	unsigned long  st_rdev;
+	unsigned long  st_size;
+	unsigned long  st_blksize;
+	unsigned long  st_blocks;
+
+	unsigned long  st_atime;
+	unsigned long  st_atime_nsec;
+	unsigned long  st_mtime;
+	unsigned long  st_mtime_nsec;
+
+	unsigned long  st_ctime;
+	unsigned long  st_ctime_nsec;
+	unsigned long  __unused[2];
+};
+
+#elif defined(__ARM_EABI__)
+/* Syscalls for ARM in ARM or Thumb modes :
+ *   - registers are 32-bit
+ *   - stack is 8-byte aligned
+ *     ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html)
+ *   - syscall number is passed in r7
+ *   - arguments are in r0, r1, r2, r3, r4, r5
+ *   - the system call is performed by calling svc #0
+ *   - syscall return comes in r0.
+ *   - only lr is clobbered.
+ *   - the arguments are cast to long and assigned into the target registers
+ *     which are then simply passed as registers to the asm code, so that we
+ *     don't have to experience issues with register constraints.
+ *   - the syscall number is always specified last in order to allow to force
+ *     some registers before (gcc refuses a %-register at the last position).
+ *
+ * Also, ARM supports the old_select syscall if newselect is not available
+ */
+#define __ARCH_WANT_SYS_OLD_SELECT
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+	register long _num asm("r7") = (num);                                 \
+	register long _arg1 asm("r0");                                        \
+									      \
+	asm volatile (                                                        \
+		"svc #0\n"                                                    \
+		: "=r"(_arg1)                                                 \
+		: "r"(_num)                                                   \
+		: "memory", "cc", "lr"                                        \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+	register long _num asm("r7") = (num);                                 \
+	register long _arg1 asm("r0") = (long)(arg1);                         \
+									      \
+	asm volatile (                                                        \
+		"svc #0\n"                                                    \
+		: "=r"(_arg1)                                                 \
+		: "r"(_arg1),                                                 \
+		  "r"(_num)                                                   \
+		: "memory", "cc", "lr"                                        \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+	register long _num asm("r7") = (num);                                 \
+	register long _arg1 asm("r0") = (long)(arg1);                         \
+	register long _arg2 asm("r1") = (long)(arg2);                         \
+									      \
+	asm volatile (                                                        \
+		"svc #0\n"                                                    \
+		: "=r"(_arg1)                                                 \
+		: "r"(_arg1), "r"(_arg2),                                     \
+		  "r"(_num)                                                   \
+		: "memory", "cc", "lr"                                        \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+	register long _num asm("r7") = (num);                                 \
+	register long _arg1 asm("r0") = (long)(arg1);                         \
+	register long _arg2 asm("r1") = (long)(arg2);                         \
+	register long _arg3 asm("r2") = (long)(arg3);                         \
+									      \
+	asm volatile (                                                        \
+		"svc #0\n"                                                    \
+		: "=r"(_arg1)                                                 \
+		: "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+		  "r"(_num)                                                   \
+		: "memory", "cc", "lr"                                        \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+	register long _num asm("r7") = (num);                                 \
+	register long _arg1 asm("r0") = (long)(arg1);                         \
+	register long _arg2 asm("r1") = (long)(arg2);                         \
+	register long _arg3 asm("r2") = (long)(arg3);                         \
+	register long _arg4 asm("r3") = (long)(arg4);                         \
+									      \
+	asm volatile (                                                        \
+		"svc #0\n"                                                    \
+		: "=r"(_arg1)                                                 \
+		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+		  "r"(_num)                                                   \
+		: "memory", "cc", "lr"                                        \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+	register long _num asm("r7") = (num);                                 \
+	register long _arg1 asm("r0") = (long)(arg1);                         \
+	register long _arg2 asm("r1") = (long)(arg2);                         \
+	register long _arg3 asm("r2") = (long)(arg3);                         \
+	register long _arg4 asm("r3") = (long)(arg4);                         \
+	register long _arg5 asm("r4") = (long)(arg5);                         \
+									      \
+	asm volatile (                                                        \
+		"svc #0\n"                                                    \
+		: "=r" (_arg1)                                                \
+		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+		  "r"(_num)                                                   \
+		: "memory", "cc", "lr"                                        \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+/* startup code */
+asm(".section .text\n"
+    ".global _start\n"
+    "_start:\n"
+#if defined(__THUMBEB__) || defined(__THUMBEL__)
+    /* We enter here in 32-bit mode but if some previous functions were in
+     * 16-bit mode, the assembler cannot know, so we need to tell it we're in
+     * 32-bit now, then switch to 16-bit (is there a better way to do it than
+     * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that
+     * it generates correct instructions. Note that we do not support thumb1.
+     */
+    ".code 32\n"
+    "add     r0, pc, #1\n"
+    "bx      r0\n"
+    ".code 16\n"
+#endif
+    "pop {%r0}\n"                 // argc was in the stack
+    "mov %r1, %sp\n"              // argv = sp
+    "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
+    "add %r2, %r2, $4\n"          //        ... + 4
+    "and %r3, %r1, $-8\n"         // AAPCS : sp must be 8-byte aligned in the
+    "mov %sp, %r3\n"              //         callee, an bl doesn't push (lr=pc)
+    "bl main\n"                   // main() returns the status code, we'll exit with it.
+    "and %r0, %r0, $0xff\n"       // limit exit code to 8 bits
+    "movs r7, $1\n"               // NR_exit == 1
+    "svc $0x00\n"
+    "");
+
+/* fcntl / open */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_CREAT          0x40
+#define O_EXCL           0x80
+#define O_NOCTTY        0x100
+#define O_TRUNC         0x200
+#define O_APPEND        0x400
+#define O_NONBLOCK      0x800
+#define O_DIRECTORY    0x4000
+
+/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+ * exactly 56 bytes (stops before the unused array). In big endian, the format
+ * differs as devices are returned as short only.
+ */
+struct sys_stat_struct {
+#if defined(__ARMEB__)
+	unsigned short st_dev;
+	unsigned short __pad1;
+#else
+	unsigned long  st_dev;
+#endif
+	unsigned long  st_ino;
+	unsigned short st_mode;
+	unsigned short st_nlink;
+	unsigned short st_uid;
+	unsigned short st_gid;
+#if defined(__ARMEB__)
+	unsigned short st_rdev;
+	unsigned short __pad2;
+#else
+	unsigned long  st_rdev;
+#endif
+	unsigned long  st_size;
+	unsigned long  st_blksize;
+	unsigned long  st_blocks;
+	unsigned long  st_atime;
+	unsigned long  st_atime_nsec;
+	unsigned long  st_mtime;
+	unsigned long  st_mtime_nsec;
+	unsigned long  st_ctime;
+	unsigned long  st_ctime_nsec;
+	unsigned long  __unused[2];
+};
+
+#elif defined(__aarch64__)
+/* Syscalls for AARCH64 :
+ *   - registers are 64-bit
+ *   - stack is 16-byte aligned
+ *   - syscall number is passed in x8
+ *   - arguments are in x0, x1, x2, x3, x4, x5
+ *   - the system call is performed by calling svc 0
+ *   - syscall return comes in x0.
+ *   - the arguments are cast to long and assigned into the target registers
+ *     which are then simply passed as registers to the asm code, so that we
+ *     don't have to experience issues with register constraints.
+ *
+ * On aarch64, select() is not implemented so we have to use pselect6().
+ */
+#define __ARCH_WANT_SYS_PSELECT6
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+	register long _num  asm("x8") = (num);                                \
+	register long _arg1 asm("x0");                                        \
+									      \
+	asm volatile (                                                        \
+		"svc #0\n"                                                    \
+		: "=r"(_arg1)                                                 \
+		: "r"(_num)                                                   \
+		: "memory", "cc"                                              \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+	register long _num  asm("x8") = (num);                                \
+	register long _arg1 asm("x0") = (long)(arg1);                         \
+									      \
+	asm volatile (                                                        \
+		"svc #0\n"                                                    \
+		: "=r"(_arg1)                                                 \
+		: "r"(_arg1),                                                 \
+		  "r"(_num)                                                   \
+		: "memory", "cc"                                              \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+	register long _num  asm("x8") = (num);                                \
+	register long _arg1 asm("x0") = (long)(arg1);                         \
+	register long _arg2 asm("x1") = (long)(arg2);                         \
+									      \
+	asm volatile (                                                        \
+		"svc #0\n"                                                    \
+		: "=r"(_arg1)                                                 \
+		: "r"(_arg1), "r"(_arg2),                                     \
+		  "r"(_num)                                                   \
+		: "memory", "cc"                                              \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+	register long _num  asm("x8") = (num);                                \
+	register long _arg1 asm("x0") = (long)(arg1);                         \
+	register long _arg2 asm("x1") = (long)(arg2);                         \
+	register long _arg3 asm("x2") = (long)(arg3);                         \
+									      \
+	asm volatile (                                                        \
+		"svc #0\n"                                                    \
+		: "=r"(_arg1)                                                 \
+		: "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+		  "r"(_num)                                                   \
+		: "memory", "cc"                                              \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+	register long _num  asm("x8") = (num);                                \
+	register long _arg1 asm("x0") = (long)(arg1);                         \
+	register long _arg2 asm("x1") = (long)(arg2);                         \
+	register long _arg3 asm("x2") = (long)(arg3);                         \
+	register long _arg4 asm("x3") = (long)(arg4);                         \
+									      \
+	asm volatile (                                                        \
+		"svc #0\n"                                                    \
+		: "=r"(_arg1)                                                 \
+		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+		  "r"(_num)                                                   \
+		: "memory", "cc"                                              \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+	register long _num  asm("x8") = (num);                                \
+	register long _arg1 asm("x0") = (long)(arg1);                         \
+	register long _arg2 asm("x1") = (long)(arg2);                         \
+	register long _arg3 asm("x2") = (long)(arg3);                         \
+	register long _arg4 asm("x3") = (long)(arg4);                         \
+	register long _arg5 asm("x4") = (long)(arg5);                         \
+									      \
+	asm volatile (                                                        \
+		"svc #0\n"                                                    \
+		: "=r" (_arg1)                                                \
+		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+		  "r"(_num)                                                   \
+		: "memory", "cc"                                              \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
+({                                                                            \
+	register long _num  asm("x8") = (num);                                \
+	register long _arg1 asm("x0") = (long)(arg1);                         \
+	register long _arg2 asm("x1") = (long)(arg2);                         \
+	register long _arg3 asm("x2") = (long)(arg3);                         \
+	register long _arg4 asm("x3") = (long)(arg4);                         \
+	register long _arg5 asm("x4") = (long)(arg5);                         \
+	register long _arg6 asm("x5") = (long)(arg6);                         \
+									      \
+	asm volatile (                                                        \
+		"svc #0\n"                                                    \
+		: "=r" (_arg1)                                                \
+		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+		  "r"(_arg6), "r"(_num)                                       \
+		: "memory", "cc"                                              \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+/* startup code */
+asm(".section .text\n"
+    ".global _start\n"
+    "_start:\n"
+    "ldr x0, [sp]\n"              // argc (x0) was in the stack
+    "add x1, sp, 8\n"             // argv (x1) = sp
+    "lsl x2, x0, 3\n"             // envp (x2) = 8*argc ...
+    "add x2, x2, 8\n"             //           + 8 (skip null)
+    "add x2, x2, x1\n"            //           + argv
+    "and sp, x1, -16\n"           // sp must be 16-byte aligned in the callee
+    "bl main\n"                   // main() returns the status code, we'll exit with it.
+    "and x0, x0, 0xff\n"          // limit exit code to 8 bits
+    "mov x8, 93\n"                // NR_exit == 93
+    "svc #0\n"
+    "");
+
+/* fcntl / open */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_CREAT          0x40
+#define O_EXCL           0x80
+#define O_NOCTTY        0x100
+#define O_TRUNC         0x200
+#define O_APPEND        0x400
+#define O_NONBLOCK      0x800
+#define O_DIRECTORY    0x4000
+
+/* The struct returned by the newfstatat() syscall. Differs slightly from the
+ * x86_64's stat one by field ordering, so be careful.
+ */
+struct sys_stat_struct {
+	unsigned long   st_dev;
+	unsigned long   st_ino;
+	unsigned int    st_mode;
+	unsigned int    st_nlink;
+	unsigned int    st_uid;
+	unsigned int    st_gid;
+
+	unsigned long   st_rdev;
+	unsigned long   __pad1;
+	long            st_size;
+	int             st_blksize;
+	int             __pad2;
+
+	long            st_blocks;
+	long            st_atime;
+	unsigned long   st_atime_nsec;
+	long            st_mtime;
+
+	unsigned long   st_mtime_nsec;
+	long            st_ctime;
+	unsigned long   st_ctime_nsec;
+	unsigned int    __unused[2];
+};
+
+#elif defined(__mips__) && defined(_ABIO32)
+/* Syscalls for MIPS ABI O32 :
+ *   - WARNING! there's always a delayed slot!
+ *   - WARNING again, the syntax is different, registers take a '$' and numbers
+ *     do not.
+ *   - registers are 32-bit
+ *   - stack is 8-byte aligned
+ *   - syscall number is passed in v0 (starts at 0xfa0).
+ *   - arguments are in a0, a1, a2, a3, then the stack. The caller needs to
+ *     leave some room in the stack for the callee to save a0..a3 if needed.
+ *   - Many registers are clobbered, in fact only a0..a2 and s0..s8 are
+ *     preserved. See: https://www.linux-mips.org/wiki/Syscall as well as
+ *     scall32-o32.S in the kernel sources.
+ *   - the system call is performed by calling "syscall"
+ *   - syscall return comes in v0, and register a3 needs to be checked to know
+ *     if an error occured, in which case errno is in v0.
+ *   - the arguments are cast to long and assigned into the target registers
+ *     which are then simply passed as registers to the asm code, so that we
+ *     don't have to experience issues with register constraints.
+ */
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+	register long _num asm("v0") = (num);                                 \
+	register long _arg4 asm("a3");                                        \
+									      \
+	asm volatile (                                                        \
+		"addiu $sp, $sp, -32\n"                                       \
+		"syscall\n"                                                   \
+		"addiu $sp, $sp, 32\n"                                        \
+		: "=r"(_num), "=r"(_arg4)                                     \
+		: "r"(_num)                                                   \
+		: "memory", "cc", "at", "v1", "hi", "lo",                     \
+									      \
+	);                                                                    \
+	_arg4 ? -_num : _num;                                                 \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+	register long _num asm("v0") = (num);                                 \
+	register long _arg1 asm("a0") = (long)(arg1);                         \
+	register long _arg4 asm("a3");                                        \
+									      \
+	asm volatile (                                                        \
+		"addiu $sp, $sp, -32\n"                                       \
+		"syscall\n"                                                   \
+		"addiu $sp, $sp, 32\n"                                        \
+		: "=r"(_num), "=r"(_arg4)                                     \
+		: "0"(_num),                                                  \
+		  "r"(_arg1)                                                  \
+		: "memory", "cc", "at", "v1", "hi", "lo",                     \
+									      \
+	);                                                                    \
+	_arg4 ? -_num : _num;                                                 \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+	register long _num asm("v0") = (num);                                 \
+	register long _arg1 asm("a0") = (long)(arg1);                         \
+	register long _arg2 asm("a1") = (long)(arg2);                         \
+	register long _arg4 asm("a3");                                        \
+									      \
+	asm volatile (                                                        \
+		"addiu $sp, $sp, -32\n"                                       \
+		"syscall\n"                                                   \
+		"addiu $sp, $sp, 32\n"                                        \
+		: "=r"(_num), "=r"(_arg4)                                     \
+		: "0"(_num),                                                  \
+		  "r"(_arg1), "r"(_arg2)                                      \
+		: "memory", "cc", "at", "v1", "hi", "lo",                     \
+									      \
+	);                                                                    \
+	_arg4 ? -_num : _num;                                                 \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+	register long _num asm("v0")  = (num);                                \
+	register long _arg1 asm("a0") = (long)(arg1);                         \
+	register long _arg2 asm("a1") = (long)(arg2);                         \
+	register long _arg3 asm("a2") = (long)(arg3);                         \
+	register long _arg4 asm("a3");                                        \
+									      \
+	asm volatile (                                                        \
+		"addiu $sp, $sp, -32\n"                                       \
+		"syscall\n"                                                   \
+		"addiu $sp, $sp, 32\n"                                        \
+		: "=r"(_num), "=r"(_arg4)                                     \
+		: "0"(_num),                                                  \
+		  "r"(_arg1), "r"(_arg2), "r"(_arg3)                          \
+		: "memory", "cc", "at", "v1", "hi", "lo",                     \
+									      \
+	);                                                                    \
+	_arg4 ? -_num : _num;                                                 \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+	register long _num asm("v0") = (num);                                 \
+	register long _arg1 asm("a0") = (long)(arg1);                         \
+	register long _arg2 asm("a1") = (long)(arg2);                         \
+	register long _arg3 asm("a2") = (long)(arg3);                         \
+	register long _arg4 asm("a3") = (long)(arg4);                         \
+									      \
+	asm volatile (                                                        \
+		"addiu $sp, $sp, -32\n"                                       \
+		"syscall\n"                                                   \
+		"addiu $sp, $sp, 32\n"                                        \
+		: "=r" (_num), "=r"(_arg4)                                    \
+		: "0"(_num),                                                  \
+		  "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4)              \
+		: "memory", "cc", "at", "v1", "hi", "lo",                     \
+									      \
+	);                                                                    \
+	_arg4 ? -_num : _num;                                                 \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+	register long _num asm("v0") = (num);                                 \
+	register long _arg1 asm("a0") = (long)(arg1);                         \
+	register long _arg2 asm("a1") = (long)(arg2);                         \
+	register long _arg3 asm("a2") = (long)(arg3);                         \
+	register long _arg4 asm("a3") = (long)(arg4);                         \
+	register long _arg5 = (long)(arg5);				      \
+									      \
+	asm volatile (                                                        \
+		"addiu $sp, $sp, -32\n"                                       \
+		"sw %7, 16($sp)\n"                                            \
+		"syscall\n  "                                                 \
+		"addiu $sp, $sp, 32\n"                                        \
+		: "=r" (_num), "=r"(_arg4)                                    \
+		: "0"(_num),                                                  \
+		  "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5)  \
+		: "memory", "cc", "at", "v1", "hi", "lo",                     \
+									      \
+	);                                                                    \
+	_arg4 ? -_num : _num;                                                 \
+})
+
+/* startup code, note that it's called __start on MIPS */
+asm(".section .text\n"
+    ".set nomips16\n"
+    ".global __start\n"
+    ".set    noreorder\n"
+    ".option pic0\n"
+    ".ent __start\n"
+    "__start:\n"
+    "lw $a0,($sp)\n"              // argc was in the stack
+    "addiu  $a1, $sp, 4\n"        // argv = sp + 4
+    "sll $a2, $a0, 2\n"           // a2 = argc * 4
+    "add   $a2, $a2, $a1\n"       // envp = argv + 4*argc ...
+    "addiu $a2, $a2, 4\n"         //        ... + 4
+    "li $t0, -8\n"
+    "and $sp, $sp, $t0\n"         // sp must be 8-byte aligned
+    "addiu $sp,$sp,-16\n"         // the callee expects to save a0..a3 there!
+    "jal main\n"                  // main() returns the status code, we'll exit with it.
+    "nop\n"                       // delayed slot
+    "and $a0, $v0, 0xff\n"        // limit exit code to 8 bits
+    "li $v0, 4001\n"              // NR_exit == 4001
+    "syscall\n"
+    ".end __start\n"
+    "");
+
+/* fcntl / open */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_APPEND       0x0008
+#define O_NONBLOCK     0x0080
+#define O_CREAT        0x0100
+#define O_TRUNC        0x0200
+#define O_EXCL         0x0400
+#define O_NOCTTY       0x0800
+#define O_DIRECTORY   0x10000
+
+/* The struct returned by the stat() syscall. 88 bytes are returned by the
+ * syscall.
+ */
+struct sys_stat_struct {
+	unsigned int  st_dev;
+	long          st_pad1[3];
+	unsigned long st_ino;
+	unsigned int  st_mode;
+	unsigned int  st_nlink;
+	unsigned int  st_uid;
+	unsigned int  st_gid;
+	unsigned int  st_rdev;
+	long          st_pad2[2];
+	long          st_size;
+	long          st_pad3;
+	long          st_atime;
+	long          st_atime_nsec;
+	long          st_mtime;
+	long          st_mtime_nsec;
+	long          st_ctime;
+	long          st_ctime_nsec;
+	long          st_blksize;
+	long          st_blocks;
+	long          st_pad4[14];
+};
+
+#endif
+
+
+/* Below are the C functions used to declare the raw syscalls. They try to be
+ * architecture-agnostic, and return either a success or -errno. Declaring them
+ * static will lead to them being inlined in most cases, but it's still possible
+ * to reference them by a pointer if needed.
+ */
+static __attribute__((unused))
+void *sys_brk(void *addr)
+{
+	return (void *)my_syscall1(__NR_brk, addr);
+}
+
+static __attribute__((noreturn,unused))
+void sys_exit(int status)
+{
+	my_syscall1(__NR_exit, status & 255);
+	while(1); // shut the "noreturn" warnings.
+}
+
+static __attribute__((unused))
+int sys_chdir(const char *path)
+{
+	return my_syscall1(__NR_chdir, path);
+}
+
+static __attribute__((unused))
+int sys_chmod(const char *path, mode_t mode)
+{
+#ifdef __NR_fchmodat
+	return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0);
+#else
+	return my_syscall2(__NR_chmod, path, mode);
+#endif
+}
+
+static __attribute__((unused))
+int sys_chown(const char *path, uid_t owner, gid_t group)
+{
+#ifdef __NR_fchownat
+	return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0);
+#else
+	return my_syscall3(__NR_chown, path, owner, group);
+#endif
+}
+
+static __attribute__((unused))
+int sys_chroot(const char *path)
+{
+	return my_syscall1(__NR_chroot, path);
+}
+
+static __attribute__((unused))
+int sys_close(int fd)
+{
+	return my_syscall1(__NR_close, fd);
+}
+
+static __attribute__((unused))
+int sys_dup(int fd)
+{
+	return my_syscall1(__NR_dup, fd);
+}
+
+static __attribute__((unused))
+int sys_dup2(int old, int new)
+{
+	return my_syscall2(__NR_dup2, old, new);
+}
+
+static __attribute__((unused))
+int sys_execve(const char *filename, char *const argv[], char *const envp[])
+{
+	return my_syscall3(__NR_execve, filename, argv, envp);
+}
+
+static __attribute__((unused))
+pid_t sys_fork(void)
+{
+	return my_syscall0(__NR_fork);
+}
+
+static __attribute__((unused))
+int sys_fsync(int fd)
+{
+	return my_syscall1(__NR_fsync, fd);
+}
+
+static __attribute__((unused))
+int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count)
+{
+	return my_syscall3(__NR_getdents64, fd, dirp, count);
+}
+
+static __attribute__((unused))
+pid_t sys_getpgrp(void)
+{
+	return my_syscall0(__NR_getpgrp);
+}
+
+static __attribute__((unused))
+pid_t sys_getpid(void)
+{
+	return my_syscall0(__NR_getpid);
+}
+
+static __attribute__((unused))
+int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	return my_syscall2(__NR_gettimeofday, tv, tz);
+}
+
+static __attribute__((unused))
+int sys_ioctl(int fd, unsigned long req, void *value)
+{
+	return my_syscall3(__NR_ioctl, fd, req, value);
+}
+
+static __attribute__((unused))
+int sys_kill(pid_t pid, int signal)
+{
+	return my_syscall2(__NR_kill, pid, signal);
+}
+
+static __attribute__((unused))
+int sys_link(const char *old, const char *new)
+{
+#ifdef __NR_linkat
+	return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0);
+#else
+	return my_syscall2(__NR_link, old, new);
+#endif
+}
+
+static __attribute__((unused))
+off_t sys_lseek(int fd, off_t offset, int whence)
+{
+	return my_syscall3(__NR_lseek, fd, offset, whence);
+}
+
+static __attribute__((unused))
+int sys_mkdir(const char *path, mode_t mode)
+{
+#ifdef __NR_mkdirat
+	return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode);
+#else
+	return my_syscall2(__NR_mkdir, path, mode);
+#endif
+}
+
+static __attribute__((unused))
+long sys_mknod(const char *path, mode_t mode, dev_t dev)
+{
+#ifdef __NR_mknodat
+	return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev);
+#else
+	return my_syscall3(__NR_mknod, path, mode, dev);
+#endif
+}
+
+static __attribute__((unused))
+int sys_mount(const char *src, const char *tgt, const char *fst,
+	      unsigned long flags, const void *data)
+{
+	return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
+}
+
+static __attribute__((unused))
+int sys_open(const char *path, int flags, mode_t mode)
+{
+#ifdef __NR_openat
+	return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
+#else
+	return my_syscall3(__NR_open, path, flags, mode);
+#endif
+}
+
+static __attribute__((unused))
+int sys_pivot_root(const char *new, const char *old)
+{
+	return my_syscall2(__NR_pivot_root, new, old);
+}
+
+static __attribute__((unused))
+int sys_poll(struct pollfd *fds, int nfds, int timeout)
+{
+	return my_syscall3(__NR_poll, fds, nfds, timeout);
+}
+
+static __attribute__((unused))
+ssize_t sys_read(int fd, void *buf, size_t count)
+{
+	return my_syscall3(__NR_read, fd, buf, count);
+}
+
+static __attribute__((unused))
+ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
+{
+	return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
+}
+
+static __attribute__((unused))
+int sys_sched_yield(void)
+{
+	return my_syscall0(__NR_sched_yield);
+}
+
+static __attribute__((unused))
+int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
+	struct sel_arg_struct {
+		unsigned long n;
+		fd_set *r, *w, *e;
+		struct timeval *t;
+	} arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
+	return my_syscall1(__NR_select, &arg);
+#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6)
+	struct timespec t;
+
+	if (timeout) {
+		t.tv_sec  = timeout->tv_sec;
+		t.tv_nsec = timeout->tv_usec * 1000;
+	}
+	return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#else
+#ifndef __NR__newselect
+#define __NR__newselect __NR_select
+#endif
+	return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
+#endif
+}
+
+static __attribute__((unused))
+int sys_setpgid(pid_t pid, pid_t pgid)
+{
+	return my_syscall2(__NR_setpgid, pid, pgid);
+}
+
+static __attribute__((unused))
+pid_t sys_setsid(void)
+{
+	return my_syscall0(__NR_setsid);
+}
+
+static __attribute__((unused))
+int sys_stat(const char *path, struct stat *buf)
+{
+	struct sys_stat_struct stat;
+	long ret;
+
+#ifdef __NR_newfstatat
+	/* only solution for arm64 */
+	ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0);
+#else
+	ret = my_syscall2(__NR_stat, path, &stat);
+#endif
+	buf->st_dev     = stat.st_dev;
+	buf->st_ino     = stat.st_ino;
+	buf->st_mode    = stat.st_mode;
+	buf->st_nlink   = stat.st_nlink;
+	buf->st_uid     = stat.st_uid;
+	buf->st_gid     = stat.st_gid;
+	buf->st_rdev    = stat.st_rdev;
+	buf->st_size    = stat.st_size;
+	buf->st_blksize = stat.st_blksize;
+	buf->st_blocks  = stat.st_blocks;
+	buf->st_atime   = stat.st_atime;
+	buf->st_mtime   = stat.st_mtime;
+	buf->st_ctime   = stat.st_ctime;
+	return ret;
+}
+
+
+static __attribute__((unused))
+int sys_symlink(const char *old, const char *new)
+{
+#ifdef __NR_symlinkat
+	return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new);
+#else
+	return my_syscall2(__NR_symlink, old, new);
+#endif
+}
+
+static __attribute__((unused))
+mode_t sys_umask(mode_t mode)
+{
+	return my_syscall1(__NR_umask, mode);
+}
+
+static __attribute__((unused))
+int sys_umount2(const char *path, int flags)
+{
+	return my_syscall2(__NR_umount2, path, flags);
+}
+
+static __attribute__((unused))
+int sys_unlink(const char *path)
+{
+#ifdef __NR_unlinkat
+	return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0);
+#else
+	return my_syscall1(__NR_unlink, path);
+#endif
+}
+
+static __attribute__((unused))
+pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+	return my_syscall4(__NR_wait4, pid, status, options, rusage);
+}
+
+static __attribute__((unused))
+pid_t sys_waitpid(pid_t pid, int *status, int options)
+{
+	return sys_wait4(pid, status, options, 0);
+}
+
+static __attribute__((unused))
+pid_t sys_wait(int *status)
+{
+	return sys_waitpid(-1, status, 0);
+}
+
+static __attribute__((unused))
+ssize_t sys_write(int fd, const void *buf, size_t count)
+{
+	return my_syscall3(__NR_write, fd, buf, count);
+}
+
+
+/* Below are the libc-compatible syscalls which return x or -1 and set errno.
+ * They rely on the functions above. Similarly they're marked static so that it
+ * is possible to assign pointers to them if needed.
+ */
+
+static __attribute__((unused))
+int brk(void *addr)
+{
+	void *ret = sys_brk(addr);
+
+	if (!ret) {
+		SET_ERRNO(ENOMEM);
+		return -1;
+	}
+	return 0;
+}
+
+static __attribute__((noreturn,unused))
+void exit(int status)
+{
+	sys_exit(status);
+}
+
+static __attribute__((unused))
+int chdir(const char *path)
+{
+	int ret = sys_chdir(path);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int chmod(const char *path, mode_t mode)
+{
+	int ret = sys_chmod(path, mode);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int chown(const char *path, uid_t owner, gid_t group)
+{
+	int ret = sys_chown(path, owner, group);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int chroot(const char *path)
+{
+	int ret = sys_chroot(path);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int close(int fd)
+{
+	int ret = sys_close(fd);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int dup2(int old, int new)
+{
+	int ret = sys_dup2(old, new);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int execve(const char *filename, char *const argv[], char *const envp[])
+{
+	int ret = sys_execve(filename, argv, envp);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+pid_t fork(void)
+{
+	pid_t ret = sys_fork();
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int fsync(int fd)
+{
+	int ret = sys_fsync(fd);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int getdents64(int fd, struct linux_dirent64 *dirp, int count)
+{
+	int ret = sys_getdents64(fd, dirp, count);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+pid_t getpgrp(void)
+{
+	pid_t ret = sys_getpgrp();
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+pid_t getpid(void)
+{
+	pid_t ret = sys_getpid();
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	int ret = sys_gettimeofday(tv, tz);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int ioctl(int fd, unsigned long req, void *value)
+{
+	int ret = sys_ioctl(fd, req, value);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int kill(pid_t pid, int signal)
+{
+	int ret = sys_kill(pid, signal);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int link(const char *old, const char *new)
+{
+	int ret = sys_link(old, new);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+off_t lseek(int fd, off_t offset, int whence)
+{
+	off_t ret = sys_lseek(fd, offset, whence);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int mkdir(const char *path, mode_t mode)
+{
+	int ret = sys_mkdir(path, mode);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int mknod(const char *path, mode_t mode, dev_t dev)
+{
+	int ret = sys_mknod(path, mode, dev);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int mount(const char *src, const char *tgt,
+	  const char *fst, unsigned long flags,
+	  const void *data)
+{
+	int ret = sys_mount(src, tgt, fst, flags, data);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int open(const char *path, int flags, mode_t mode)
+{
+	int ret = sys_open(path, flags, mode);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int pivot_root(const char *new, const char *old)
+{
+	int ret = sys_pivot_root(new, old);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int poll(struct pollfd *fds, int nfds, int timeout)
+{
+	int ret = sys_poll(fds, nfds, timeout);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+ssize_t read(int fd, void *buf, size_t count)
+{
+	ssize_t ret = sys_read(fd, buf, count);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int reboot(int cmd)
+{
+	int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+void *sbrk(intptr_t inc)
+{
+	void *ret;
+
+	/* first call to find current end */
+	if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc))
+		return ret + inc;
+
+	SET_ERRNO(ENOMEM);
+	return (void *)-1;
+}
+
+static __attribute__((unused))
+int sched_yield(void)
+{
+	int ret = sys_sched_yield();
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+	int ret = sys_select(nfds, rfds, wfds, efds, timeout);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int setpgid(pid_t pid, pid_t pgid)
+{
+	int ret = sys_setpgid(pid, pgid);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+pid_t setsid(void)
+{
+	pid_t ret = sys_setsid();
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+unsigned int sleep(unsigned int seconds)
+{
+	struct timeval my_timeval = { seconds, 0 };
+
+	if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+		return my_timeval.tv_sec + !!my_timeval.tv_usec;
+	else
+		return 0;
+}
+
+static __attribute__((unused))
+int stat(const char *path, struct stat *buf)
+{
+	int ret = sys_stat(path, buf);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int symlink(const char *old, const char *new)
+{
+	int ret = sys_symlink(old, new);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int tcsetpgrp(int fd, pid_t pid)
+{
+	return ioctl(fd, TIOCSPGRP, &pid);
+}
+
+static __attribute__((unused))
+mode_t umask(mode_t mode)
+{
+	return sys_umask(mode);
+}
+
+static __attribute__((unused))
+int umount2(const char *path, int flags)
+{
+	int ret = sys_umount2(path, flags);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+int unlink(const char *path)
+{
+	int ret = sys_unlink(path);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+	pid_t ret = sys_wait4(pid, status, options, rusage);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+pid_t waitpid(pid_t pid, int *status, int options)
+{
+	pid_t ret = sys_waitpid(pid, status, options);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+pid_t wait(int *status)
+{
+	pid_t ret = sys_wait(status);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+ssize_t write(int fd, const void *buf, size_t count)
+{
+	ssize_t ret = sys_write(fd, buf, count);
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
+/* some size-optimized reimplementations of a few common str* and mem*
+ * functions. They're marked static, except memcpy() and raise() which are used
+ * by libgcc on ARM, so they are marked weak instead in order not to cause an
+ * error when building a program made of multiple files (not recommended).
+ */
+
+static __attribute__((unused))
+void *memmove(void *dst, const void *src, size_t len)
+{
+	ssize_t pos = (dst <= src) ? -1 : (long)len;
+	void *ret = dst;
+
+	while (len--) {
+		pos += (dst <= src) ? 1 : -1;
+		((char *)dst)[pos] = ((char *)src)[pos];
+	}
+	return ret;
+}
+
+static __attribute__((unused))
+void *memset(void *dst, int b, size_t len)
+{
+	char *p = dst;
+
+	while (len--)
+		*(p++) = b;
+	return dst;
+}
+
+static __attribute__((unused))
+int memcmp(const void *s1, const void *s2, size_t n)
+{
+	size_t ofs = 0;
+	char c1 = 0;
+
+	while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) {
+		ofs++;
+	}
+	return c1;
+}
+
+static __attribute__((unused))
+char *strcpy(char *dst, const char *src)
+{
+	char *ret = dst;
+
+	while ((*dst++ = *src++));
+	return ret;
+}
+
+static __attribute__((unused))
+char *strchr(const char *s, int c)
+{
+	while (*s) {
+		if (*s == (char)c)
+			return (char *)s;
+		s++;
+	}
+	return NULL;
+}
+
+static __attribute__((unused))
+char *strrchr(const char *s, int c)
+{
+	const char *ret = NULL;
+
+	while (*s) {
+		if (*s == (char)c)
+			ret = s;
+		s++;
+	}
+	return (char *)ret;
+}
+
+static __attribute__((unused))
+size_t nolibc_strlen(const char *str)
+{
+	size_t len;
+
+	for (len = 0; str[len]; len++);
+	return len;
+}
+
+#define strlen(str) ({                          \
+	__builtin_constant_p((str)) ?           \
+		__builtin_strlen((str)) :       \
+		nolibc_strlen((str));           \
+})
+
+static __attribute__((unused))
+int isdigit(int c)
+{
+	return (unsigned int)(c - '0') <= 9;
+}
+
+static __attribute__((unused))
+long atol(const char *s)
+{
+	unsigned long ret = 0;
+	unsigned long d;
+	int neg = 0;
+
+	if (*s == '-') {
+		neg = 1;
+		s++;
+	}
+
+	while (1) {
+		d = (*s++) - '0';
+		if (d > 9)
+			break;
+		ret *= 10;
+		ret += d;
+	}
+
+	return neg ? -ret : ret;
+}
+
+static __attribute__((unused))
+int atoi(const char *s)
+{
+	return atol(s);
+}
+
+static __attribute__((unused))
+const char *ltoa(long in)
+{
+	/* large enough for -9223372036854775808 */
+	static char buffer[21];
+	char       *pos = buffer + sizeof(buffer) - 1;
+	int         neg = in < 0;
+	unsigned long n = neg ? -in : in;
+
+	*pos-- = '\0';
+	do {
+		*pos-- = '0' + n % 10;
+		n /= 10;
+		if (pos < buffer)
+			return pos + 1;
+	} while (n);
+
+	if (neg)
+		*pos-- = '-';
+	return pos + 1;
+}
+
+__attribute__((weak,unused))
+void *memcpy(void *dst, const void *src, size_t len)
+{
+	return memmove(dst, src, len);
+}
+
+/* needed by libgcc for divide by zero */
+__attribute__((weak,unused))
+int raise(int signal)
+{
+	return kill(getpid(), signal);
+}
+
+/* Here come a few helper functions */
+
+static __attribute__((unused))
+void FD_ZERO(fd_set *set)
+{
+	memset(set, 0, sizeof(*set));
+}
+
+static __attribute__((unused))
+void FD_SET(int fd, fd_set *set)
+{
+	if (fd < 0 || fd >= FD_SETSIZE)
+		return;
+	set->fd32[fd / 32] |= 1 << (fd & 31);
+}
+
+/* WARNING, it only deals with the 4096 first majors and 256 first minors */
+static __attribute__((unused))
+dev_t makedev(unsigned int major, unsigned int minor)
+{
+	return ((major & 0xfff) << 8) | (minor & 0xff);
+}
diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt
index 833f826d6ec2..933b4fd12327 100644
--- a/tools/testing/selftests/rcutorture/doc/initrd.txt
+++ b/tools/testing/selftests/rcutorture/doc/initrd.txt
@@ -1,9 +1,12 @@
-This document describes one way to create the initrd directory hierarchy
-in order to allow an initrd to be built into your kernel.  The trick
-here is to steal the initrd file used on your Linux laptop, Ubuntu in
-this case.  There are probably much better ways of doing this.
+The rcutorture scripting tools automatically create the needed initrd
+directory using dracut.  Failing that, this tool will create an initrd
+containing a single statically linked binary named "init" that loops
+over a very long sleep() call.  In both cases, this creation is done
+by tools/testing/selftests/rcutorture/bin/mkinitrd.sh.
 
-That said, here are the commands:
+However, if you are attempting to run rcutorture on a system that does
+not have dracut installed, and if you don't like the notion of static
+linking, you might wish to press an existing initrd into service:
 
 ------------------------------------------------------------------------
 cd tools/testing/selftests/rcutorture
@@ -11,22 +14,7 @@ zcat /initrd.img > /tmp/initrd.img.zcat
 mkdir initrd
 cd initrd
 cpio -id < /tmp/initrd.img.zcat
-------------------------------------------------------------------------
-
-Another way to create an initramfs image is using "dracut"[1], which is
-available on many distros, however the initramfs dracut generates is a cpio
-archive with another cpio archive in it, so an extra step is needed to create
-the initrd directory hierarchy.
-
-Here are the commands to create a initrd directory for rcutorture using
-dracut:
-
-------------------------------------------------------------------------
-dracut --no-hostonly --no-hostonly-cmdline --module "base bash shutdown" /tmp/initramfs.img
-cd tools/testing/selftests/rcutorture
-mkdir initrd
-cd initrd
-/usr/lib/dracut/skipcpio /tmp/initramfs.img | zcat | cpio -id < /tmp/initramfs.img
+# Manually verify that initrd contains needed binaries and libraries.
 ------------------------------------------------------------------------
 
 Interestingly enough, if you are running rcutorture, you don't really
@@ -39,75 +27,12 @@ with 0755 mode.
 ------------------------------------------------------------------------
 #!/bin/sh
 
-[ -d /dev ] || mkdir -m 0755 /dev
-[ -d /root ] || mkdir -m 0700 /root
-[ -d /sys ] || mkdir /sys
-[ -d /proc ] || mkdir /proc
-[ -d /tmp ] || mkdir /tmp
-mkdir -p /var/lock
-mount -t sysfs -o nodev,noexec,nosuid sysfs /sys
-mount -t proc -o nodev,noexec,nosuid proc /proc
-# Some things don't work properly without /etc/mtab.
-ln -sf /proc/mounts /etc/mtab
-
-# Note that this only becomes /dev on the real filesystem if udev's scripts
-# are used; which they will be, but it's worth pointing out
-if ! mount -t devtmpfs -o mode=0755 udev /dev; then
-	echo "W: devtmpfs not available, falling back to tmpfs for /dev"
-	mount -t tmpfs -o mode=0755 udev /dev
-	[ -e /dev/console ] || mknod --mode=600 /dev/console c 5 1
-	[ -e /dev/kmsg ] || mknod --mode=644 /dev/kmsg c 1 11
-	[ -e /dev/null ] || mknod --mode=666 /dev/null c 1 3
-fi
-
-mkdir /dev/pts
-mount -t devpts -o noexec,nosuid,gid=5,mode=0620 devpts /dev/pts || true
-mount -t tmpfs -o "nosuid,size=20%,mode=0755" tmpfs /run
-mkdir /run/initramfs
-# compatibility symlink for the pre-oneiric locations
-ln -s /run/initramfs /dev/.initramfs
-
-# Export relevant variables
-export ROOT=
-export ROOTDELAY=
-export ROOTFLAGS=
-export ROOTFSTYPE=
-export IP=
-export BOOT=
-export BOOTIF=
-export UBIMTD=
-export break=
-export init=/sbin/init
-export quiet=n
-export readonly=y
-export rootmnt=/root
-export debug=
-export panic=
-export blacklist=
-export resume=
-export resume_offset=
-export recovery=
-
-for i in /sys/devices/system/cpu/cpu*/online
-do
-	case $i in
-	'/sys/devices/system/cpu/cpu0/online')
-		;;
-	'/sys/devices/system/cpu/cpu*/online')
-		;;
-	*)
-		echo 1 > $i
-		;;
-	esac
-done
-
 while :
 do
 	sleep 10
 done
 ------------------------------------------------------------------------
 
-References:
-[1]: https://dracut.wiki.kernel.org/index.php/Main_Page
-[2]: http://blog.elastocloud.org/2015/06/rapid-linux-kernel-devtest-with-qemu.html
-[3]: https://www.centos.org/forums/viewtopic.php?t=51621
+This approach also allows most of the binaries and libraries in the
+initrd filesystem to be dispensed with, which can save significant
+space in rcutorture's "res" directory.
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
index 891ad13e95b2..d27285f8ee82 100644
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
@@ -131,8 +131,8 @@ struct hlist_node {
  * weird ABI and we need to ask it explicitly.
  *
  * The alignment is required to guarantee that bits 0 and 1 of @next will be
- * clear under normal conditions -- as long as we use call_rcu(),
- * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback.
+ * clear under normal conditions -- as long as we use call_rcu() or
+ * call_srcu() to queue callback.
  *
  * This guarantee is important for few reasons:
  *  - future call_rcu_lazy() will make use of lower bits in the pointer;
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 7cfdfbc910e0..50e25438fb3c 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -196,7 +196,7 @@ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
  */
 static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
 {
-	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
+	lockdep_assert_held(&irq->irq_lock);
 
 	/* If the interrupt is active, it must stay on the current vcpu */
 	if (irq->active)
@@ -273,7 +273,7 @@ static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 
-	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
+	lockdep_assert_held(&vgic_cpu->ap_list_lock);
 
 	list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
 }
@@ -311,7 +311,7 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
 {
 	struct kvm_vcpu *vcpu;
 
-	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
+	lockdep_assert_held(&irq->irq_lock);
 
 retry:
 	vcpu = vgic_target_oracle(irq);
@@ -702,7 +702,7 @@ static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
 static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
 				    struct vgic_irq *irq, int lr)
 {
-	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
+	lockdep_assert_held(&irq->irq_lock);
 
 	if (kvm_vgic_global_state.type == VGIC_V2)
 		vgic_v2_populate_lr(vcpu, irq, lr);
@@ -736,7 +736,7 @@ static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
 
 	*multi_sgi = false;
 
-	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
+	lockdep_assert_held(&vgic_cpu->ap_list_lock);
 
 	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 		int w;
@@ -761,7 +761,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
 	bool multi_sgi;
 	u8 prio = 0xff;
 
-	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
+	lockdep_assert_held(&vgic_cpu->ap_list_lock);
 
 	count = compute_ap_list_depth(vcpu, &multi_sgi);
 	if (count > kvm_vgic_global_state.nr_lr || multi_sgi)