241 files changed, 8854 insertions, 3096 deletions
diff --git a/Documentation/ABI/stable/sysfs-driver-mlxreg-io b/Documentation/ABI/stable/sysfs-driver-mlxreg-io
index 12c3f895cd2f..b312242d4f40 100644
--- a/Documentation/ABI/stable/sysfs-driver-mlxreg-io
+++ b/Documentation/ABI/stable/sysfs-driver-mlxreg-io
@@ -467,3 +467,39 @@ Description:	These files provide the maximum powered required for line card
 		feeding and line card configuration Id.
 
 		The files are read only.
+
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/phy_reset
+Date:		May 2022
+KernelVersion:	5.19
+Contact:	Vadim Pasternak <vadimpmellanox.com>
+Description:	This file allows to reset PHY 88E1548 when attribute is set 0
+		due to some abnormal PHY behavior.
+		Expected behavior:
+		When phy_reset is written 1, all PHY 88E1548 are released
+		from the reset state, when 0 - are hold in reset state.
+
+		The files are read/write.
+
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/mac_reset
+Date:		May 2022
+KernelVersion:	5.19
+Contact:	Vadim Pasternak <vadimpmellanox.com>
+Description:	This file allows to reset ASIC MT52132 when attribute is set 0
+		due to some abnormal ASIC behavior.
+		Expected behavior:
+		When mac_reset is written 1, the ASIC MT52132 is released
+		from the reset state, when 0 - is hold in reset state.
+
+		The files are read/write.
+
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/qsfp_pwr_good
+Date:		May 2022
+KernelVersion:	5.19
+Contact:	Vadim Pasternak <vadimpmellanox.com>
+Description:	This file shows QSFP ports power status. The value is set to 0
+		when one of any QSFP ports is plugged. The value is set to 1 when
+		there are no any QSFP ports are plugged.
+		The possible values are:
+		0 - Power good, 1 - Not power good.
+
+		The files are read only.
diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index 839fab811b18..db17fc8a0c9f 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -27,8 +27,9 @@ Description:
 				[fowner=] [fgroup=]]
 			lsm:	[[subj_user=] [subj_role=] [subj_type=]
 				 [obj_user=] [obj_role=] [obj_type=]]
-			option:	[[appraise_type=]] [template=] [permit_directio]
-				[appraise_flag=] [appraise_algos=] [keyrings=]
+			option:	[digest_type=] [template=] [permit_directio]
+				[appraise_type=] [appraise_flag=]
+				[appraise_algos=] [keyrings=]
 		  base:
 			func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK]
 				[FIRMWARE_CHECK]
@@ -47,10 +48,21 @@ Description:
 			fgroup:= decimal value
 		  lsm:  are LSM specific
 		  option:
-			appraise_type:= [imasig] [imasig|modsig]
+			appraise_type:= [imasig] | [imasig|modsig] | [sigv3]
+			    where 'imasig' is the original or the signature
+				format v2.
+			    where 'modsig' is an appended signature,
+			    where 'sigv3' is the signature format v3. (Currently
+				limited to fsverity digest based signatures
+				stored in security.ima xattr. Requires
+				specifying "digest_type=verity" first.)
+
 			appraise_flag:= [check_blacklist]
 			Currently, blacklist check is only for files signed with appended
 			signature.
+			digest_type:= verity
+			    Require fs-verity's file digest instead of the
+			    regular IMA file hash.
 			keyrings:= list of keyrings
 			(eg, .builtin_trusted_keys|.ima). Only valid
 			when action is "measure" and func is KEY_CHECK.
@@ -149,3 +161,30 @@ Description:
 		security.ima xattr of a file:
 
 			appraise func=SETXATTR_CHECK appraise_algos=sha256,sha384,sha512
+
+		Example of a 'measure' rule requiring fs-verity's digests
+		with indication of type of digest in the measurement list.
+
+			measure func=FILE_CHECK digest_type=verity \
+				template=ima-ngv2
+
+		Example of 'measure' and 'appraise' rules requiring fs-verity
+		signatures (format version 3) stored in security.ima xattr.
+
+		The 'measure' rule specifies the 'ima-sigv3' template option,
+		which includes the indication of type of digest and the file
+		signature in the measurement list.
+
+			measure func=BPRM_CHECK digest_type=verity \
+				template=ima-sigv3
+
+
+		The 'appraise' rule specifies the type and signature format
+		version (sigv3) required.
+
+			appraise func=BPRM_CHECK digest_type=verity \
+				appraise_type=sigv3
+
+		All of these policy rules could, for example, be constrained
+		either based on a filesystem's UUID (fsuuid) or based on LSM
+		labels.
diff --git a/Documentation/ABI/testing/securityfs-secrets-coco b/Documentation/ABI/testing/securityfs-secrets-coco
new file mode 100644
index 000000000000..f2b6909155f9
--- /dev/null
+++ b/Documentation/ABI/testing/securityfs-secrets-coco
@@ -0,0 +1,51 @@
+What:		security/secrets/coco
+Date:		February 2022
+Contact:	Dov Murik <dovmurik@linux.ibm.com>
+Description:
+		Exposes confidential computing (coco) EFI secrets to
+		userspace via securityfs.
+
+		EFI can declare memory area used by confidential computing
+		platforms (such as AMD SEV and SEV-ES) for secret injection by
+		the Guest Owner during VM's launch.  The secrets are encrypted
+		by the Guest Owner and decrypted inside the trusted enclave,
+		and therefore are not readable by the untrusted host.
+
+		The efi_secret module exposes the secrets to userspace.  Each
+		secret appears as a file under <securityfs>/secrets/coco,
+		where the filename is the GUID of the entry in the secrets
+		table.  This module is loaded automatically by the EFI driver
+		if the EFI secret area is populated.
+
+		Two operations are supported for the files: read and unlink.
+		Reading the file returns the content of secret entry.
+		Unlinking the file overwrites the secret data with zeroes and
+		removes the entry from the filesystem.  A secret cannot be read
+		after it has been unlinked.
+
+		For example, listing the available secrets::
+
+		  # modprobe efi_secret
+		  # ls -l /sys/kernel/security/secrets/coco
+		  -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+		  -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+		  -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+		  -r--r----- 1 root root 0 Jun 28 11:54 e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+		Reading the secret data by reading a file::
+
+		  # cat /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+		  the-content-of-the-secret-data
+
+		Wiping a secret by unlinking a file::
+
+		  # rm /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+		  # ls -l /sys/kernel/security/secrets/coco
+		  -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+		  -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+		  -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+
+		Note: The binary format of the secrets table injected by the
+		Guest Owner is described in
+		drivers/virt/coco/efi_secret/efi_secret.c under "Structure of
+		the EFI secret area".
diff --git a/Documentation/ABI/testing/sysfs-class-regulator b/Documentation/ABI/testing/sysfs-class-regulator
index 8516f08806dd..475b9a372657 100644
--- a/Documentation/ABI/testing/sysfs-class-regulator
+++ b/Documentation/ABI/testing/sysfs-class-regulator
@@ -370,3 +370,84 @@ Description:
 
 		'unknown' means software cannot determine the state, or
 		the reported state is invalid.
+
+What:		/sys/class/regulator/.../under_voltage
+Date:		April 2022
+KernelVersion:	5.18
+Contact:	Zev Weiss <zev@bewilderbeest.net>
+Description:
+		Some regulator directories will contain a field called
+		under_voltage.  This indicates if the device reports an
+		under-voltage fault (1) or not (0).
+
+What:		/sys/class/regulator/.../over_current
+Date:		April 2022
+KernelVersion:	5.18
+Contact:	Zev Weiss <zev@bewilderbeest.net>
+Description:
+		Some regulator directories will contain a field called
+		over_current.  This indicates if the device reports an
+		over-current fault (1) or not (0).
+
+What:		/sys/class/regulator/.../regulation_out
+Date:		April 2022
+KernelVersion:	5.18
+Contact:	Zev Weiss <zev@bewilderbeest.net>
+Description:
+		Some regulator directories will contain a field called
+		regulation_out.  This indicates if the device reports an
+		out-of-regulation fault (1) or not (0).
+
+What:		/sys/class/regulator/.../fail
+Date:		April 2022
+KernelVersion:	5.18
+Contact:	Zev Weiss <zev@bewilderbeest.net>
+Description:
+		Some regulator directories will contain a field called
+		fail.  This indicates if the device reports an output failure
+		(1) or not (0).
+
+What:		/sys/class/regulator/.../over_temp
+Date:		April 2022
+KernelVersion:	5.18
+Contact:	Zev Weiss <zev@bewilderbeest.net>
+Description:
+		Some regulator directories will contain a field called
+		over_temp.  This indicates if the device reports an
+		over-temperature fault (1) or not (0).
+
+What:		/sys/class/regulator/.../under_voltage_warn
+Date:		April 2022
+KernelVersion:	5.18
+Contact:	Zev Weiss <zev@bewilderbeest.net>
+Description:
+		Some regulator directories will contain a field called
+		under_voltage_warn.  This indicates if the device reports an
+		under-voltage warning (1) or not (0).
+
+What:		/sys/class/regulator/.../over_current_warn
+Date:		April 2022
+KernelVersion:	5.18
+Contact:	Zev Weiss <zev@bewilderbeest.net>
+Description:
+		Some regulator directories will contain a field called
+		over_current_warn.  This indicates if the device reports an
+		over-current warning (1) or not (0).
+
+What:		/sys/class/regulator/.../over_voltage_warn
+Date:		April 2022
+KernelVersion:	5.18
+Contact:	Zev Weiss <zev@bewilderbeest.net>
+Description:
+		Some regulator directories will contain a field called
+		over_voltage_warn.  This indicates if the device reports an
+		over-voltage warning (1) or not (0).
+
+What:		/sys/class/regulator/.../over_temp_warn
+Date:		April 2022
+KernelVersion:	5.18
+Contact:	Zev Weiss <zev@bewilderbeest.net>
+Description:
+		Some regulator directories will contain a field called
+		over_temp_warn.  This indicates if the device reports an
+		over-temperature warning (1) or not (0).
diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkback b/Documentation/ABI/testing/sysfs-driver-xen-blkback
index a74dfe52dd76..7faf719af165 100644
--- a/Documentation/ABI/testing/sysfs-driver-xen-blkback
+++ b/Documentation/ABI/testing/sysfs-driver-xen-blkback
@@ -29,7 +29,7 @@ Description:
 What:           /sys/module/xen_blkback/parameters/buffer_squeeze_duration_ms
 Date:           December 2019
 KernelVersion:  5.6
-Contact:        SeongJae Park <sj@kernel.org>
+Contact:        Maximilian Heyne <mheyne@amazon.de>
 Description:
                 When memory pressure is reported to blkback this option
                 controls the duration in milliseconds that blkback will not
@@ -39,7 +39,7 @@ Description:
 What:           /sys/module/xen_blkback/parameters/feature_persistent
 Date:           September 2020
 KernelVersion:  5.10
-Contact:        SeongJae Park <sj@kernel.org>
+Contact:        Maximilian Heyne <mheyne@amazon.de>
 Description:
                 Whether to enable the persistent grants feature or not.  Note
                 that this option only takes effect on newly created backends.
diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkfront b/Documentation/ABI/testing/sysfs-driver-xen-blkfront
index 61fd173fabfe..7f646c58832e 100644
--- a/Documentation/ABI/testing/sysfs-driver-xen-blkfront
+++ b/Documentation/ABI/testing/sysfs-driver-xen-blkfront
@@ -12,7 +12,7 @@ Description:
 What:           /sys/module/xen_blkfront/parameters/feature_persistent
 Date:           September 2020
 KernelVersion:  5.10
-Contact:        SeongJae Park <sj@kernel.org>
+Contact:        Maximilian Heyne <mheyne@amazon.de>
 Description:
                 Whether to enable the persistent grants feature or not.  Note
                 that this option only takes effect on newly created frontends.
diff --git a/Documentation/ABI/testing/sysfs-platform-intel-ifs b/Documentation/ABI/testing/sysfs-platform-intel-ifs
new file mode 100644
index 000000000000..486d6d2ff8a0
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-intel-ifs
@@ -0,0 +1,39 @@
+What:		/sys/devices/virtual/misc/intel_ifs_<N>/run_test
+Date:		April 21 2022
+KernelVersion:	5.19
+Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
+Description:	Write <cpu#> to trigger IFS test for one online core.
+		Note that the test is per core. The cpu# can be
+		for any thread on the core. Running on one thread
+		completes the test for the core containing that thread.
+		Example: to test the core containing cpu5: echo 5 >
+		/sys/devices/platform/intel_ifs.<N>/run_test
+
+What:		/sys/devices/virtual/misc/intel_ifs_<N>/status
+Date:		April 21 2022
+KernelVersion:	5.19
+Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
+Description:	The status of the last test. It can be one of "pass", "fail"
+		or "untested".
+
+What:		/sys/devices/virtual/misc/intel_ifs_<N>/details
+Date:		April 21 2022
+KernelVersion:	5.19
+Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
+Description:	Additional information regarding the last test. The details file reports
+		the hex value of the SCAN_STATUS MSR. Note that the error_code field
+		may contain driver defined software code not defined in the Intel SDM.
+
+What:		/sys/devices/virtual/misc/intel_ifs_<N>/image_version
+Date:		April 21 2022
+KernelVersion:	5.19
+Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
+Description:	Version (hexadecimal) of loaded IFS binary image. If no scan image
+		is loaded reports "none".
+
+What:		/sys/devices/virtual/misc/intel_ifs_<N>/reload
+Date:		April 21 2022
+KernelVersion:	5.19
+Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
+Description:	Write "1" (or "y" or "Y") to reload the IFS image from
+		/lib/firmware/intel/ifs/ff-mm-ss.scan.
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
index f4efd6897b09..b34990c7c377 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
@@ -973,7 +973,7 @@ The ``->dynticks`` field counts the corresponding CPU's transitions to
 and from either dyntick-idle or user mode, so that this counter has an
 even value when the CPU is in dyntick-idle mode or user mode and an odd
 value otherwise. The transitions to/from user mode need to be counted
-for user mode adaptive-ticks support (see timers/NO_HZ.txt).
+for user mode adaptive-ticks support (see Documentation/timers/no_hz.rst).
 
 The ``->rcu_need_heavy_qs`` field is used to record the fact that the
 RCU core code would really like to see a quiescent state from the
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
index 6f89cf1e567d..c9c957c85bac 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
@@ -406,7 +406,7 @@ In earlier implementations, the task requesting the expedited grace
 period also drove it to completion. This straightforward approach had
 the disadvantage of needing to account for POSIX signals sent to user
 tasks, so more recent implemementations use the Linux kernel's
-`workqueues <https://www.kernel.org/doc/Documentation/core-api/workqueue.rst>`__.
+workqueues (see Documentation/core-api/workqueue.rst).
 
 The requesting task still does counter snapshotting and funnel-lock
 processing, but the task reaching the top of the funnel lock does a
diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
index 45278e2974c0..04ed8bf27a0e 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -370,8 +370,8 @@ pointer fetched by rcu_dereference() may not be used outside of the
 outermost RCU read-side critical section containing that
 rcu_dereference(), unless protection of the corresponding data
 element has been passed from RCU to some other synchronization
-mechanism, most commonly locking or `reference
-counting <https://www.kernel.org/doc/Documentation/RCU/rcuref.txt>`__.
+mechanism, most commonly locking or reference counting
+(see ../../rcuref.rst).
 
 .. |high-quality implementation of C11 memory_order_consume [PDF]| replace:: high-quality implementation of C11 ``memory_order_consume`` [PDF]
 .. _high-quality implementation of C11 memory_order_consume [PDF]: http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf
@@ -2654,6 +2654,38 @@ synchronize_rcu(), and rcu_barrier(), respectively. In
 three APIs are therefore implemented by separate functions that check
 for voluntary context switches.
 
+Tasks Rude RCU
+~~~~~~~~~~~~~~
+
+Some forms of tracing need to wait for all preemption-disabled regions
+of code running on any online CPU, including those executed when RCU is
+not watching.  This means that synchronize_rcu() is insufficient, and
+Tasks Rude RCU must be used instead.  This flavor of RCU does its work by
+forcing a workqueue to be scheduled on each online CPU, hence the "Rude"
+moniker.  And this operation is considered to be quite rude by real-time
+workloads that don't want their ``nohz_full`` CPUs receiving IPIs and
+by battery-powered systems that don't want their idle CPUs to be awakened.
+
+The tasks-rude-RCU API is also reader-marking-free and thus quite compact,
+consisting of call_rcu_tasks_rude(), synchronize_rcu_tasks_rude(),
+and rcu_barrier_tasks_rude().
+
+Tasks Trace RCU
+~~~~~~~~~~~~~~~
+
+Some forms of tracing need to sleep in readers, but cannot tolerate
+SRCU's read-side overhead, which includes a full memory barrier in both
+srcu_read_lock() and srcu_read_unlock().  This need is handled by a
+Tasks Trace RCU that uses scheduler locking and IPIs to synchronize with
+readers.  Real-time systems that cannot tolerate IPIs may build their
+kernels with ``CONFIG_TASKS_TRACE_RCU_READ_MB=y``, which avoids the IPIs at
+the expense of adding full memory barriers to the read-side primitives.
+
+The tasks-trace-RCU API is also reasonably compact,
+consisting of rcu_read_lock_trace(), rcu_read_unlock_trace(),
+rcu_read_lock_trace_held(), call_rcu_tasks_trace(),
+synchronize_rcu_tasks_trace(), and rcu_barrier_tasks_trace().
+
 Possible Future Changes
 -----------------------
 
diff --git a/Documentation/RCU/arrayRCU.rst b/Documentation/RCU/arrayRCU.rst
index 4051ea3871ef..a5f2ff8fc54c 100644
--- a/Documentation/RCU/arrayRCU.rst
+++ b/Documentation/RCU/arrayRCU.rst
@@ -33,8 +33,8 @@ Situation 1: Hash Tables
 
 Hash tables are often implemented as an array, where each array entry
 has a linked-list hash chain.  Each hash chain can be protected by RCU
-as described in the listRCU.txt document.  This approach also applies
-to other array-of-list situations, such as radix trees.
+as described in listRCU.rst.  This approach also applies to other
+array-of-list situations, such as radix trees.
 
 .. _static_arrays:
 
diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
index f4545b7c9a63..42cc5d891bd2 100644
--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@@ -140,8 +140,7 @@ over a rather long period of time, but improvements are always welcome!
 		prevents destructive compiler optimizations.  However,
 		with a bit of devious creativity, it is possible to
 		mishandle the return value from rcu_dereference().
-		Please see rcu_dereference.txt in this directory for
-		more information.
+		Please see rcu_dereference.rst for more information.
 
 		The rcu_dereference() primitive is used by the
 		various "_rcu()" list-traversal primitives, such
@@ -151,7 +150,7 @@ over a rather long period of time, but improvements are always welcome!
 		primitives.  This is particularly useful in code that
 		is common to readers and updaters.  However, lockdep
 		will complain if you access rcu_dereference() outside
-		of an RCU read-side critical section.  See lockdep.txt
+		of an RCU read-side critical section.  See lockdep.rst
 		to learn what to do about this.
 
 		Of course, neither rcu_dereference() nor the "_rcu()"
@@ -323,7 +322,7 @@ over a rather long period of time, but improvements are always welcome!
 	primitives when the update-side lock is held is that doing so
 	can be quite helpful in reducing code bloat when common code is
 	shared between readers and updaters.  Additional primitives
-	are provided for this case, as discussed in lockdep.txt.
+	are provided for this case, as discussed in lockdep.rst.
 
 	One exception to this rule is when data is only ever added to
 	the linked data structure, and is never removed during any
@@ -480,4 +479,4 @@ over a rather long period of time, but improvements are always welcome!
 	both rcu_barrier() and synchronize_rcu(), if necessary, using
 	something like workqueues to to execute them concurrently.
 
-	See rcubarrier.txt for more information.
+	See rcubarrier.rst for more information.
diff --git a/Documentation/RCU/rcu.rst b/Documentation/RCU/rcu.rst
index 0e03c6ef3147..3cfe01ba9a49 100644
--- a/Documentation/RCU/rcu.rst
+++ b/Documentation/RCU/rcu.rst
@@ -10,9 +10,8 @@ A "grace period" must elapse between the two parts, and this grace period
 must be long enough that any readers accessing the item being deleted have
 since dropped their references.  For example, an RCU-protected deletion
 from a linked list would first remove the item from the list, wait for
-a grace period to elapse, then free the element.  See the
-:ref:`Documentation/RCU/listRCU.rst <list_rcu_doc>` for more information on
-using RCU with linked lists.
+a grace period to elapse, then free the element.  See listRCU.rst for more
+information on using RCU with linked lists.
 
 Frequently Asked Questions
 --------------------------
@@ -50,7 +49,7 @@ Frequently Asked Questions
 - If I am running on a uniprocessor kernel, which can only do one
   thing at a time, why should I wait for a grace period?
 
-  See :ref:`Documentation/RCU/UP.rst <up_doc>` for more information.
+  See UP.rst for more information.
 
 - How can I see where RCU is currently used in the Linux kernel?
 
@@ -64,13 +63,13 @@ Frequently Asked Questions
 
 - What guidelines should I follow when writing code that uses RCU?
 
-  See the checklist.txt file in this directory.
+  See checklist.rst.
 
 - Why the name "RCU"?
 
   "RCU" stands for "read-copy update".
-  :ref:`Documentation/RCU/listRCU.rst <list_rcu_doc>` has more information on where
-  this name came from, search for "read-copy update" to find it.
+  listRCU.rst has more information on where this name came from, search
+  for "read-copy update" to find it.
 
 - I hear that RCU is patented?  What is with that?
 
diff --git a/Documentation/RCU/rculist_nulls.rst b/Documentation/RCU/rculist_nulls.rst
index a9fc774bc400..ca4692775ad4 100644
--- a/Documentation/RCU/rculist_nulls.rst
+++ b/Documentation/RCU/rculist_nulls.rst
@@ -8,7 +8,7 @@ This section describes how to use hlist_nulls to
 protect read-mostly linked lists and
 objects using SLAB_TYPESAFE_BY_RCU allocations.
 
-Please read the basics in Documentation/RCU/listRCU.rst
+Please read the basics in listRCU.rst.
 
 Using 'nulls'
 =============
diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst
index 78404625bad2..794837eb519b 100644
--- a/Documentation/RCU/stallwarn.rst
+++ b/Documentation/RCU/stallwarn.rst
@@ -162,6 +162,26 @@ CONFIG_RCU_CPU_STALL_TIMEOUT
 	Stall-warning messages may be enabled and disabled completely via
 	/sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
 
+CONFIG_RCU_EXP_CPU_STALL_TIMEOUT
+--------------------------------
+
+	Same as the CONFIG_RCU_CPU_STALL_TIMEOUT parameter but only for
+	the expedited grace period. This parameter defines the period
+	of time that RCU will wait from the beginning of an expedited
+	grace period until it issues an RCU CPU stall warning. This time
+	period is normally 20 milliseconds on Android devices.	A zero
+	value causes the CONFIG_RCU_CPU_STALL_TIMEOUT value to be used,
+	after conversion to milliseconds.
+
+	This configuration parameter may be changed at runtime via the
+	/sys/module/rcupdate/parameters/rcu_exp_cpu_stall_timeout, however
+	this parameter is checked only at the beginning of a cycle. If you
+	are in a current stall cycle, setting it to a new value will change
+	the timeout for the -next- stall.
+
+	Stall-warning messages may be enabled and disabled completely via
+	/sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
+
 RCU_STALL_DELAY_DELTA
 ---------------------
 
diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst
index c34d2212eaca..77ea260efd12 100644
--- a/Documentation/RCU/whatisRCU.rst
+++ b/Documentation/RCU/whatisRCU.rst
@@ -224,7 +224,7 @@ synchronize_rcu()
 	be delayed.  This property results in system resilience in face
 	of denial-of-service attacks.  Code using call_rcu() should limit
 	update rate in order to gain this same sort of resilience.  See
-	checklist.txt for some approaches to limiting the update rate.
+	checklist.rst for some approaches to limiting the update rate.
 
 rcu_assign_pointer()
 ^^^^^^^^^^^^^^^^^^^^
@@ -318,7 +318,7 @@ rcu_dereference()
 	must prohibit.	The rcu_dereference_protected() variant takes
 	a lockdep expression to indicate which locks must be acquired
 	by the caller. If the indicated protection is not provided,
-	a lockdep splat is emitted.  See Documentation/RCU/Design/Requirements/Requirements.rst
+	a lockdep splat is emitted.  See Design/Requirements/Requirements.rst
 	and the API's code comments for more details and example usage.
 
 .. 	[2] If the list_for_each_entry_rcu() instance might be used by
@@ -399,8 +399,7 @@ for specialized uses, but are relatively uncommon.
 
 This section shows a simple use of the core RCU API to protect a
 global pointer to a dynamically allocated structure.  More-typical
-uses of RCU may be found in :ref:`listRCU.rst <list_rcu_doc>`,
-:ref:`arrayRCU.rst <array_rcu_doc>`, and :ref:`NMI-RCU.rst <NMI_rcu_doc>`.
+uses of RCU may be found in listRCU.rst, arrayRCU.rst, and NMI-RCU.rst.
 ::
 
 	struct foo {
@@ -482,10 +481,9 @@ So, to sum up:
 	RCU read-side critical sections that might be referencing that
 	data item.
 
-See checklist.txt for additional rules to follow when using RCU.
-And again, more-typical uses of RCU may be found in :ref:`listRCU.rst
-<list_rcu_doc>`, :ref:`arrayRCU.rst <array_rcu_doc>`, and :ref:`NMI-RCU.rst
-<NMI_rcu_doc>`.
+See checklist.rst for additional rules to follow when using RCU.
+And again, more-typical uses of RCU may be found in listRCU.rst,
+arrayRCU.rst, and NMI-RCU.rst.
 
 .. _4_whatisRCU:
 
@@ -579,7 +577,7 @@ to avoid having to write your own callback::
 
 	kfree_rcu(old_fp, rcu);
 
-Again, see checklist.txt for additional rules governing the use of RCU.
+Again, see checklist.rst for additional rules governing the use of RCU.
 
 .. _5_whatisRCU:
 
@@ -663,7 +661,7 @@ been able to write-acquire the lock otherwise.  The smp_mb__after_spinlock()
 promotes synchronize_rcu() to a full memory barrier in compliance with
 the "Memory-Barrier Guarantees" listed in:
 
-	Documentation/RCU/Design/Requirements/Requirements.rst
+	Design/Requirements/Requirements.rst
 
 It is possible to nest rcu_read_lock(), since reader-writer locks may
 be recursively acquired.  Note also that rcu_read_lock() is immune
diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst
index 860fe651d645..5e40b3f437f9 100644
--- a/Documentation/accounting/psi.rst
+++ b/Documentation/accounting/psi.rst
@@ -37,11 +37,7 @@ Pressure interface
 Pressure information for each resource is exported through the
 respective file in /proc/pressure/ -- cpu, memory, and io.
 
-The format for CPU is as such::
-
-	some avg10=0.00 avg60=0.00 avg300=0.00 total=0
-
-and for memory and IO::
+The format is as such::
 
 	some avg10=0.00 avg60=0.00 avg300=0.00 total=0
 	full avg10=0.00 avg60=0.00 avg300=0.00 total=0
@@ -58,6 +54,9 @@ situation from a state where some tasks are stalled but the CPU is
 still doing productive work. As such, time spent in this subset of the
 stall state is tracked separately and exported in the "full" averages.
 
+CPU full is undefined at the system level, but has been reported
+since 5.13, so it is set to zero for backward compatibility.
+
 The ratios (in %) are tracked as recent trends over ten, sixty, and
 three hundred second windows, which gives insight into short term events
 as well as medium and long term trends. The total absolute stall time
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 69d7a6983f78..38aa01939e1e 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1881,7 +1881,7 @@ IO Latency Interface Files
   io.latency
 	This takes a similar format as the other controllers.
 
-		"MAJOR:MINOR target=<target time in microseconds"
+		"MAJOR:MINOR target=<target time in microseconds>"
 
   io.stat
 	If the controller is enabled you will see extra stats in io.stat in
diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index 01ba293a2d70..959f73a32712 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -99,6 +99,7 @@ parameter is applicable::
 	ALSA	ALSA sound support is enabled.
 	APIC	APIC support is enabled.
 	APM	Advanced Power Management support is enabled.
+	APPARMOR AppArmor support is enabled.
 	ARM	ARM architecture is enabled.
 	ARM64	ARM64 architecture is enabled.
 	AX25	Appropriate AX.25 support is enabled.
@@ -108,15 +109,15 @@ parameter is applicable::
 	DYNAMIC_DEBUG Build in debug messages and enable them at runtime
 	EDD	BIOS Enhanced Disk Drive Services (EDD) is enabled
 	EFI	EFI Partitioning (GPT) is enabled
-	EIDE	EIDE/ATAPI support is enabled.
 	EVM	Extended Verification Module
 	FB	The frame buffer device is enabled.
 	FTRACE	Function tracing enabled.
 	GCOV	GCOV profiling is enabled.
+	HIBERNATION HIBERNATION is enabled.
 	HW	Appropriate hardware is enabled.
+	HYPER_V HYPERV support is enabled.
 	IA-64	IA-64 architecture is enabled.
 	IMA     Integrity measurement architecture is enabled.
-	IOSCHED	More than one I/O scheduler is enabled.
 	IP_PNP	IP DHCP, BOOTP, or RARP is enabled.
 	IPV6	IPv6 support is enabled.
 	ISAPNP	ISA PnP code is enabled.
@@ -140,7 +141,6 @@ parameter is applicable::
 	NUMA	NUMA support is enabled.
 	NFS	Appropriate NFS support is enabled.
 	OF	Devicetree is enabled.
-	OSS	OSS sound support is enabled.
 	PV_OPS	A paravirtualized kernel is enabled.
 	PARIDE	The ParIDE (parallel port IDE) subsystem is enabled.
 	PARISC	The PA-RISC architecture is enabled.
@@ -160,7 +160,6 @@ parameter is applicable::
 			the Documentation/scsi/ sub-directory.
 	SECURITY Different security models are enabled.
 	SELINUX SELinux support is enabled.
-	APPARMOR AppArmor support is enabled.
 	SERIAL	Serial support is enabled.
 	SH	SuperH architecture is enabled.
 	SMP	The kernel is an SMP kernel.
@@ -168,7 +167,6 @@ parameter is applicable::
 	SWSUSP	Software suspend (hibernation) is enabled.
 	SUSPEND	System suspend states are enabled.
 	TPM	TPM drivers are enabled.
-	TS	Appropriate touchscreen support is enabled.
 	UMS	USB Mass Storage support is enabled.
 	USB	USB support is enabled.
 	USBHID	USB Human Interface Device support is enabled.
@@ -177,7 +175,6 @@ parameter is applicable::
 	VGA	The VGA console has been enabled.
 	VT	Virtual terminal support is enabled.
 	WDT	Watchdog support is enabled.
-	XT	IBM PC/XT MFM hard disk support is enabled.
 	X86-32	X86-32, aka i386 architecture is enabled.
 	X86-64	X86-64 architecture is enabled.
 			More X86-64 boot options can be found in
@@ -211,7 +208,7 @@ The number of kernel parameters is not limited, but the length of the
 complete command line (parameters including spaces etc.) is limited to
 a fixed number of characters. This limit depends on the architecture
 and is between 256 and 4096 characters. It is defined in the file
-./include/asm/setup.h as COMMAND_LINE_SIZE.
+./include/uapi/asm-generic/setup.h as COMMAND_LINE_SIZE.
 
 Finally, the [KMG] suffix is commonly described after a number of kernel
 parameter values. These 'K', 'M', and 'G' letters represent the _binary_
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 3f1cc5e317ed..a9066cfb85a0 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -461,6 +461,12 @@
 			Format: <io>,<irq>,<mode>
 			See header of drivers/net/hamradio/baycom_ser_hdx.c.
 
+	bert_disable	[ACPI]
+			Disable BERT OS support on buggy BIOSes.
+
+	bgrt_disable	[ACPI][X86]
+			Disable BGRT to avoid flickering OEM logo.
+
 	blkdevparts=	Manual partition parsing of block device(s) for
 			embedded devices based on command line input.
 			See Documentation/block/cmdline-partition.rst
@@ -476,12 +482,6 @@
 
 			See Documentation/admin-guide/bootconfig.rst
 
-	bert_disable	[ACPI]
-			Disable BERT OS support on buggy BIOSes.
-
-	bgrt_disable	[ACPI][X86]
-			Disable BGRT to avoid flickering OEM logo.
-
 	bttv.card=	[HW,V4L] bttv (bt848 + bt878 based grabber cards)
 	bttv.radio=	Most important insmod options are available as
 			kernel args too.
@@ -563,6 +563,25 @@
 
 	cio_ignore=	[S390]
 			See Documentation/s390/common_io.rst for details.
+
+	clearcpuid=X[,X...] [X86]
+			Disable CPUID feature X for the kernel. See
+			arch/x86/include/asm/cpufeatures.h for the valid bit
+			numbers X. Note the Linux-specific bits are not necessarily
+			stable over kernel options, but the vendor-specific
+			ones should be.
+			X can also be a string as appearing in the flags: line
+			in /proc/cpuinfo which does not have the above
+			instability issue. However, not all features have names
+			in /proc/cpuinfo.
+			Note that using this option will taint your kernel.
+			Also note that user programs calling CPUID directly
+			or using the feature without checking anything
+			will still see it. This just prevents it from
+			being used by the kernel or shown in /proc/cpuinfo.
+			Also note the kernel might malfunction if you disable
+			some critical bits.
+
 	clk_ignore_unused
 			[CLK]
 			Prevents the clock framework from automatically gating
@@ -631,19 +650,6 @@
 			Defaults to zero when built as a module and to
 			10 seconds when built into the kernel.
 
-	clearcpuid=BITNUM[,BITNUM...] [X86]
-			Disable CPUID feature X for the kernel. See
-			arch/x86/include/asm/cpufeatures.h for the valid bit
-			numbers. Note the Linux specific bits are not necessarily
-			stable over kernel options, but the vendor specific
-			ones should be.
-			Also note that user programs calling CPUID directly
-			or using the feature without checking anything
-			will still see it. This just prevents it from
-			being used by the kernel or shown in /proc/cpuinfo.
-			Also note the kernel might malfunction if you disable
-			some critical bits.
-
 	cma=nn[MG]@[start[MG][-end[MG]]]
 			[KNL,CMA]
 			Sets the size of kernel global memory area for
@@ -765,6 +771,24 @@
 			0: default value, disable debugging
 			1: enable debugging at boot time
 
+	cpcihp_generic=	[HW,PCI] Generic port I/O CompactPCI driver
+			Format:
+			<first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
+
+	cpu0_hotplug	[X86] Turn on CPU0 hotplug feature when
+			CONFIG_BOOTPARAM_HOTPLUG_CPU0 is off.
+			Some features depend on CPU0. Known dependencies are:
+			1. Resume from suspend/hibernate depends on CPU0.
+			Suspend/hibernate will fail if CPU0 is offline and you
+			need to online CPU0 before suspend/hibernate.
+			2. PIC interrupts also depend on CPU0. CPU0 can't be
+			removed if a PIC interrupt is detected.
+			It's said poweroff/reboot may depend on CPU0 on some
+			machines although I haven't seen such issues so far
+			after CPU0 is offline on a few tested machines.
+			If the dependencies are under your control, you can
+			turn on cpu0_hotplug.
+
 	cpuidle.off=1	[CPU_IDLE]
 			disable the cpuidle sub-system
 
@@ -785,9 +809,13 @@
 			on every CPU online, such as boot, and resume from suspend.
 			Default: 10000
 
-	cpcihp_generic=	[HW,PCI] Generic port I/O CompactPCI driver
-			Format:
-			<first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
+	crash_kexec_post_notifiers
+			Run kdump after running panic-notifiers and dumping
+			kmsg. This only for the users who doubt kdump always
+			succeeds in any situation.
+			Note that this also increases risks of kdump failure,
+			because some panic notifiers can make the crashed
+			kernel more unstable.
 
 	crashkernel=size[KMG][@offset[KMG]]
 			[KNL] Using kexec, Linux can switch to a 'crash kernel'
@@ -808,7 +836,7 @@
 			Documentation/admin-guide/kdump/kdump.rst for an example.
 
 	crashkernel=size[KMG],high
-			[KNL, X86-64] range could be above 4G. Allow kernel
+			[KNL, X86-64, ARM64] range could be above 4G. Allow kernel
 			to allocate physical memory region from top, so could
 			be above 4G if system have more than 4G ram installed.
 			Otherwise memory region will be allocated below 4G, if
@@ -821,14 +849,20 @@
 			that require some amount of low memory, e.g. swiotlb
 			requires at least 64M+32K low memory, also enough extra
 			low memory is needed to make sure DMA buffers for 32-bit
-			devices won't run out. Kernel would try to allocate at
+			devices won't run out. Kernel would try to allocate
 			at least 256M below 4G automatically.
-			This one let user to specify own low range under 4G
+			This one lets the user specify own low range under 4G
 			for second kernel instead.
 			0: to disable low allocation.
 			It will be ignored when crashkernel=X,high is not used
 			or memory reserved is below 4G.
 
+			[KNL, ARM64] range in low memory.
+			This one lets the user specify a low range in the
+			DMA zone for the crash dump kernel.
+			It will be ignored when crashkernel=X,high is not used
+			or memory reserved is located in the DMA zones.
+
 	cryptomgr.notests
 			[KNL] Disable crypto self-tests
 
@@ -950,6 +984,8 @@
 			dump out devices still on the deferred probe list after
 			retrying.
 
+	delayacct	[KNL] Enable per-task delay accounting
+
 	dell_smm_hwmon.ignore_dmi=
 			[HW] Continue probing hardware even if DMI data
 			indicates that the driver is running on unsupported
@@ -1003,17 +1039,6 @@
 	disable=	[IPV6]
 			See Documentation/networking/ipv6.rst.
 
-	hardened_usercopy=
-                        [KNL] Under CONFIG_HARDENED_USERCOPY, whether
-                        hardening is enabled for this boot. Hardened
-                        usercopy checking is used to protect the kernel
-                        from reading or writing beyond known memory
-                        allocation boundaries as a proactive defense
-                        against bounds-checking flaws in the kernel's
-                        copy_to_user()/copy_from_user() interface.
-                on      Perform hardened usercopy checks (default).
-                off     Disable hardened usercopy checks.
-
 	disable_radix	[PPC]
 			Disable RADIX MMU mode on POWER9
 
@@ -1282,7 +1307,7 @@
 			Append ",keep" to not disable it when the real console
 			takes over.
 
-			Only one of vga, efi, serial, or usb debug port can
+			Only one of vga, serial, or usb debug port can
 			be used at a time.
 
 			Currently only ttyS0 and ttyS1 may be specified by
@@ -1297,7 +1322,7 @@
 			Interaction with the standard serial driver is not
 			very good.
 
-			The VGA and EFI output is eventually overwritten by
+			The VGA output is eventually overwritten by
 			the real console.
 
 			The xen option can only be used in Xen domains.
@@ -1316,17 +1341,6 @@
 			force: enforce the use of EDAC to report H/W event.
 			default: on.
 
-	ekgdboc=	[X86,KGDB] Allow early kernel console debugging
-			ekgdboc=kbd
-
-			This is designed to be used in conjunction with
-			the boot argument: earlyprintk=vga
-
-			This parameter works in place of the kgdboc parameter
-			but can only be used if the backing tty is available
-			very early in the boot process. For early debugging
-			via a serial port see kgdboc_earlycon instead.
-
 	edd=		[EDD]
 			Format: {"off" | "on" | "skip[mbr]"}
 
@@ -1388,6 +1402,17 @@
 	eisa_irq_edge=	[PARISC,HW]
 			See header of drivers/parisc/eisa.c.
 
+	ekgdboc=	[X86,KGDB] Allow early kernel console debugging
+			Format: ekgdboc=kbd
+
+			This is designed to be used in conjunction with
+			the boot argument: earlyprintk=vga
+
+			This parameter works in place of the kgdboc parameter
+			but can only be used if the backing tty is available
+			very early in the boot process. For early debugging
+			via a serial port see kgdboc_earlycon instead.
+
 	elanfreq=	[X86-32]
 			See comment before function elanfreq_setup() in
 			arch/x86/kernel/cpu/cpufreq/elanfreq.c.
@@ -1586,6 +1611,17 @@
 			Format: <unsigned int> such that (rxsize & ~0x1fffc0) == 0.
 			Default: 1024
 
+	hardened_usercopy=
+			[KNL] Under CONFIG_HARDENED_USERCOPY, whether
+			hardening is enabled for this boot. Hardened
+			usercopy checking is used to protect the kernel
+			from reading or writing beyond known memory
+			allocation boundaries as a proactive defense
+			against bounds-checking flaws in the kernel's
+			copy_to_user()/copy_from_user() interface.
+		on	Perform hardened usercopy checks (default).
+		off	Disable hardened usercopy checks.
+
 	hardlockup_all_cpu_backtrace=
 			[KNL] Should the hard-lockup detector generate
 			backtraces on all cpus.
@@ -1606,6 +1642,15 @@
 			corresponding firmware-first mode error processing
 			logic will be disabled.
 
+	hibernate=	[HIBERNATION]
+		noresume	Don't check if there's a hibernation image
+				present during boot.
+		nocompress	Don't compress/decompress hibernation images.
+		no		Disable hibernation and resume.
+		protect_image	Turn on image protection during restoration
+				(that will set all pages holding image data
+				during restoration read-only).
+
 	highmem=nn[KMG]	[KNL,BOOT] forces the highmem zone to have an exact
 			size of <nn>. This works even on boxes that have no
 			highmem otherwise. This also works to reduce highmem
@@ -1628,16 +1673,6 @@
 	hpet_mmap=	[X86, HPET_MMAP] Allow userspace to mmap HPET
 			registers.  Default set by CONFIG_HPET_MMAP_DEFAULT.
 
-	hugetlb_cma=	[HW,CMA] The size of a CMA area used for allocation
-			of gigantic hugepages. Or using node format, the size
-			of a CMA area per node can be specified.
-			Format: nn[KMGTPE] or (node format)
-				<node>:nn[KMGTPE][,<node>:nn[KMGTPE]]
-
-			Reserve a CMA area of given size and allocate gigantic
-			hugepages using the CMA allocator. If enabled, the
-			boot-time allocation of gigantic hugepages is skipped.
-
 	hugepages=	[HW] Number of HugeTLB pages to allocate at boot.
 			If this follows hugepagesz (below), it specifies
 			the number of pages of hugepagesz to be allocated.
@@ -1659,6 +1694,16 @@
 			Documentation/admin-guide/mm/hugetlbpage.rst.
 			Format: size[KMG]
 
+	hugetlb_cma=	[HW,CMA] The size of a CMA area used for allocation
+			of gigantic hugepages. Or using node format, the size
+			of a CMA area per node can be specified.
+			Format: nn[KMGTPE] or (node format)
+				<node>:nn[KMGTPE][,<node>:nn[KMGTPE]]
+
+			Reserve a CMA area of given size and allocate gigantic
+			hugepages using the CMA allocator. If enabled, the
+			boot-time allocation of gigantic hugepages is skipped.
+
 	hugetlb_free_vmemmap=
 			[KNL] Reguires CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
 			enabled.
@@ -1758,26 +1803,6 @@
 	icn=		[HW,ISDN]
 			Format: <io>[,<membase>[,<icn_id>[,<icn_id2>]]]
 
-	ide-core.nodma=	[HW] (E)IDE subsystem
-			Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc
-			.vlb_clock .pci_clock .noflush .nohpa .noprobe .nowerr
-			.cdrom .chs .ignore_cable are additional options
-			See Documentation/ide/ide.rst.
-
-	ide-generic.probe-mask= [HW] (E)IDE subsystem
-			Format: <int>
-			Probe mask for legacy ISA IDE ports.  Depending on
-			platform up to 6 ports are supported, enabled by
-			setting corresponding bits in the mask to 1.  The
-			default value is 0x0, which has a special meaning.
-			On systems that have PCI, it triggers scanning the
-			PCI bus for the first and the second port, which
-			are then probed.  On systems without PCI the value
-			of 0x0 enables probing the two first ports as if it
-			was 0x3.
-
-	ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
-			Claim all unknown PCI IDE storage controllers.
 
 	idle=		[X86]
 			Format: idle=poll, idle=halt, idle=nomwait
@@ -1903,7 +1928,8 @@
 
 	ima_template=	[IMA]
 			Select one of defined IMA measurements template formats.
-			Formats: { "ima" | "ima-ng" | "ima-sig" }
+			Formats: { "ima" | "ima-ng" | "ima-ngv2" | "ima-sig" |
+				   "ima-sigv2" }
 			Default: "ima-ng"
 
 	ima_template_fmt=
@@ -2622,14 +2648,14 @@
 			when set.
 			Format: <int>
 
-	libata.force=	[LIBATA] Force configurations.  The format is comma-
-			separated list of "[ID:]VAL" where ID is
-			PORT[.DEVICE].  PORT and DEVICE are decimal numbers
-			matching port, link or device.  Basically, it matches
-			the ATA ID string printed on console by libata.  If
-			the whole ID part is omitted, the last PORT and DEVICE
-			values are used.  If ID hasn't been specified yet, the
-			configuration applies to all ports, links and devices.
+	libata.force=	[LIBATA] Force configurations.  The format is a comma-
+			separated list of "[ID:]VAL" where ID is PORT[.DEVICE].
+			PORT and DEVICE are decimal numbers matching port, link
+			or device.  Basically, it matches the ATA ID string
+			printed on console by libata.  If the whole ID part is
+			omitted, the last PORT and DEVICE values are used.  If
+			ID hasn't been specified yet, the configuration applies
+			to all ports, links and devices.
 
 			If only DEVICE is omitted, the parameter applies to
 			the port and all links and devices behind it.  DEVICE
@@ -2639,7 +2665,7 @@
 			host link and device attached to it.
 
 			The VAL specifies the configuration to force.  As long
-			as there's no ambiguity shortcut notation is allowed.
+			as there is no ambiguity, shortcut notation is allowed.
 			For example, both 1.5 and 1.5G would work for 1.5Gbps.
 			The following configurations can be forced.
 
@@ -2652,27 +2678,64 @@
 			  udma[/][16,25,33,44,66,100,133] notation is also
 			  allowed.
 
+			* nohrst, nosrst, norst: suppress hard, soft and both
+			  resets.
+
+			* rstonce: only attempt one reset during hot-unplug
+			  link recovery.
+
+			* [no]dbdelay: Enable or disable the extra 200ms delay
+			  before debouncing a link PHY and device presence
+			  detection.
+
 			* [no]ncq: Turn on or off NCQ.
 
-			* [no]ncqtrim: Turn off queued DSM TRIM.
+			* [no]ncqtrim: Enable or disable queued DSM TRIM.
+
+			* [no]ncqati: Enable or disable NCQ trim on ATI chipset.
+
+			* [no]trim: Enable or disable (unqueued) TRIM.
+
+			* trim_zero: Indicate that TRIM command zeroes data.
+
+			* max_trim_128m: Set 128M maximum trim size limit.
+
+			* [no]dma: Turn on or off DMA transfers.
 
-			* nohrst, nosrst, norst: suppress hard, soft
-			  and both resets.
+			* atapi_dmadir: Enable ATAPI DMADIR bridge support.
 
-			* rstonce: only attempt one reset during
-			  hot-unplug link recovery
+			* atapi_mod16_dma: Enable the use of ATAPI DMA for
+			  commands that are not a multiple of 16 bytes.
 
-			* dump_id: dump IDENTIFY data.
+			* [no]dmalog: Enable or disable the use of the
+			  READ LOG DMA EXT command to access logs.
 
-			* atapi_dmadir: Enable ATAPI DMADIR bridge support
+			* [no]iddevlog: Enable or disable access to the
+			  identify device data log.
+
+			* [no]logdir: Enable or disable access to the general
+			  purpose log directory.
+
+			* max_sec_128: Set transfer size limit to 128 sectors.
+
+			* max_sec_1024: Set or clear transfer size limit to
+			  1024 sectors.
+
+			* max_sec_lba48: Set or clear transfer size limit to
+			  65535 sectors.
+
+			* [no]lpm: Enable or disable link power management.
+
+			* [no]setxfer: Indicate if transfer speed mode setting
+			  should be skipped.
+
+			* dump_id: Dump IDENTIFY data.
 
 			* disable: Disable this device.
 
 			If there are multiple matching configurations changing
 			the same attribute, the last one is used.
 
-	memblock=debug	[KNL] Enable memblock debug messages.
-
 	load_ramdisk=	[RAM] [Deprecated]
 
 	lockd.nlm_grace_period=P  [NFS] Assign grace period.
@@ -2814,7 +2877,7 @@
 			different yeeloong laptops.
 			Example: machtype=lemote-yeeloong-2f-7inch
 
-	max_addr=nn[KMG]	[KNL,BOOT,ia64] All physical memory greater
+	max_addr=nn[KMG]	[KNL,BOOT,IA-64] All physical memory greater
 			than or equal to this physical address is ignored.
 
 	maxcpus=	[SMP] Maximum number of processors that	an SMP kernel
@@ -2914,6 +2977,8 @@
 	mem=nopentium	[BUGS=X86-32] Disable usage of 4MB pages for kernel
 			memory.
 
+	memblock=debug	[KNL] Enable memblock debug messages.
+
 	memchunk=nn[KMG]
 			[KNL,SH] Allow user to override the default size for
 			per-device physically contiguous DMA buffers.
@@ -3057,7 +3122,7 @@
 
 	mga=		[HW,DRM]
 
-	min_addr=nn[KMG]	[KNL,BOOT,ia64] All physical memory below this
+	min_addr=nn[KMG]	[KNL,BOOT,IA-64] All physical memory below this
 			physical address is ignored.
 
 	mini2440=	[ARM,HW,KNL]
@@ -3103,6 +3168,7 @@
 					       mds=off [X86]
 					       tsx_async_abort=off [X86]
 					       kvm.nx_huge_pages=off [X86]
+					       srbds=off [X86,INTEL]
 					       no_entry_flush [PPC]
 					       no_uaccess_flush [PPC]
 
@@ -3181,20 +3247,6 @@
 	mtdparts=	[MTD]
 			See drivers/mtd/parsers/cmdlinepart.c
 
-	multitce=off	[PPC]  This parameter disables the use of the pSeries
-			firmware feature for updating multiple TCE entries
-			at a time.
-
-	onenand.bdry=	[HW,MTD] Flex-OneNAND Boundary Configuration
-
-			Format: [die0_boundary][,die0_lock][,die1_boundary][,die1_lock]
-
-			boundary - index of last SLC block on Flex-OneNAND.
-				   The remaining blocks are configured as MLC blocks.
-			lock	 - Configure if Flex-OneNAND boundary should be locked.
-				   Once locked, the boundary cannot be changed.
-				   1 indicates lock status, 0 indicates unlock status.
-
 	mtdset=		[ARM]
 			ARM/S3C2412 JIVE boot control
 
@@ -3221,6 +3273,10 @@
 			Used for mtrr cleanup. It is spare mtrr entries number.
 			Set to 2 or more if your graphical card needs more.
 
+	multitce=off	[PPC]  This parameter disables the use of the pSeries
+			firmware feature for updating multiple TCE entries
+			at a time.
+
 	n2=		[NET] SDL Inc. RISCom/N2 synchronous serial card
 
 	netdev=		[NET] Network devices parameters
@@ -3230,6 +3286,11 @@
 			This usage is only documented in each driver source
 			file if at all.
 
+	netpoll.carrier_timeout=
+			[NET] Specifies amount of time (in seconds) that
+			netpoll should wait for a carrier. By default netpoll
+			waits 4 seconds.
+
 	nf_conntrack.acct=
 			[NETFILTER] Enable connection tracking flow accounting
 			0 to disable accounting
@@ -3380,11 +3441,6 @@
 			These settings can be accessed at runtime via
 			the nmi_watchdog and hardlockup_panic sysctls.
 
-	netpoll.carrier_timeout=
-			[NET] Specifies amount of time (in seconds) that
-			netpoll should wait for a carrier. By default netpoll
-			waits 4 seconds.
-
 	no387		[BUGS=X86-32] Tells the kernel to use the 387 maths
 			emulation library even if a 387 maths coprocessor
 			is present.
@@ -3439,10 +3495,6 @@
 
 	nocache		[ARM]
 
-	noclflush	[BUGS=X86] Don't use the CLFLUSH instruction
-
-	delayacct	[KNL] Enable per-task delay accounting
-
 	nodsp		[SH] Disable hardware DSP at boot time.
 
 	noefi		Disable EFI runtime services support.
@@ -3451,16 +3503,11 @@
 
 	noexec		[IA-64]
 
-	noexec		[X86]
-			On X86-32 available only on PAE configured kernels.
-			noexec=on: enable non-executable mappings (default)
-			noexec=off: disable non-executable mappings
-
-	nosmap		[X86,PPC]
+	nosmap		[PPC]
 			Disable SMAP (Supervisor Mode Access Prevention)
 			even if it is supported by processor.
 
-	nosmep		[X86,PPC64s]
+	nosmep		[PPC64s]
 			Disable SMEP (Supervisor Mode Execution Prevention)
 			even if it is supported by processor.
 
@@ -3660,8 +3707,6 @@
 
 	nosbagart	[IA-64]
 
-	nosep		[BUGS=X86-32] Disables x86 SYSENTER/SYSEXIT support.
-
 	nosgx		[X86-64,SGX] Disables Intel SGX kernel support.
 
 	nosmp		[SMP] Tells an SMP kernel to act as a UP kernel,
@@ -3678,20 +3723,6 @@
 
 	nox2apic	[X86-64,APIC] Do not enable x2APIC mode.
 
-	cpu0_hotplug	[X86] Turn on CPU0 hotplug feature when
-			CONFIG_BOOTPARAM_HOTPLUG_CPU0 is off.
-			Some features depend on CPU0. Known dependencies are:
-			1. Resume from suspend/hibernate depends on CPU0.
-			Suspend/hibernate will fail if CPU0 is offline and you
-			need to online CPU0 before suspend/hibernate.
-			2. PIC interrupts also depend on CPU0. CPU0 can't be
-			removed if a PIC interrupt is detected.
-			It's said poweroff/reboot may depend on CPU0 on some
-			machines although I haven't seen such issues so far
-			after CPU0 is offline on a few tested machines.
-			If the dependencies are under your control, you can
-			turn on cpu0_hotplug.
-
 	nps_mtm_hs_ctr=	[KNL,ARC]
 			This parameter sets the maximum duration, in
 			cycles, each HW thread of the CTOP can run
@@ -3744,6 +3775,16 @@
 			For example, to override I2C bus2:
 			omap_mux=i2c2_scl.i2c2_scl=0x100,i2c2_sda.i2c2_sda=0x100
 
+	onenand.bdry=	[HW,MTD] Flex-OneNAND Boundary Configuration
+
+			Format: [die0_boundary][,die0_lock][,die1_boundary][,die1_lock]
+
+			boundary - index of last SLC block on Flex-OneNAND.
+				   The remaining blocks are configured as MLC blocks.
+			lock	 - Configure if Flex-OneNAND boundary should be locked.
+				   Once locked, the boundary cannot be changed.
+				   1 indicates lock status, 0 indicates unlock status.
+
 	oops=panic	Always panic on oopses. Default is to just kill the
 			process, but there is a small probability of
 			deadlocking the machine.
@@ -3814,14 +3855,6 @@
 	panic_on_warn	panic() instead of WARN().  Useful to cause kdump
 			on a WARN().
 
-	crash_kexec_post_notifiers
-			Run kdump after running panic-notifiers and dumping
-			kmsg. This only for the users who doubt kdump always
-			succeeds in any situation.
-			Note that this also increases risks of kdump failure,
-			because some panic notifiers can make the crashed
-			kernel more unstable.
-
 	parkbd.port=	[HW] Parallel port number the keyboard adapter is
 			connected to, default is 0.
 			Format: <parport#>
@@ -4893,6 +4926,18 @@
 
 	rcupdate.rcu_cpu_stall_timeout= [KNL]
 			Set timeout for RCU CPU stall warning messages.
+			The value is in seconds and the maximum allowed
+			value is 300 seconds.
+
+	rcupdate.rcu_exp_cpu_stall_timeout= [KNL]
+			Set timeout for expedited RCU CPU stall warning
+			messages.  The value is in milliseconds
+			and the maximum allowed value is 21000
+			milliseconds. Please note that this value is
+			adjusted to an arch timer tick resolution.
+			Setting this to zero causes the value from
+			rcupdate.rcu_cpu_stall_timeout to be used (after
+			conversion from seconds to milliseconds).
 
 	rcupdate.rcu_expedited= [KNL]
 			Use expedited grace-period primitives, for
@@ -4955,10 +5000,34 @@
 			number avoids disturbing real-time workloads,
 			but lengthens grace periods.
 
+	rcupdate.rcu_task_stall_info= [KNL]
+			Set initial timeout in jiffies for RCU task stall
+			informational messages, which give some indication
+			of the problem for those not patient enough to
+			wait for ten minutes.  Informational messages are
+			only printed prior to the stall-warning message
+			for a given grace period. Disable with a value
+			less than or equal to zero.  Defaults to ten
+			seconds.  A change in value does not take effect
+			until the beginning of the next grace period.
+
+	rcupdate.rcu_task_stall_info_mult= [KNL]
+			Multiplier for time interval between successive
+			RCU task stall informational messages for a given
+			RCU tasks grace period.  This value is clamped
+			to one through ten, inclusive.	It defaults to
+			the value three, so that the first informational
+			message is printed 10 seconds into the grace
+			period, the second at 40 seconds, the third at
+			160 seconds, and then the stall warning at 600
+			seconds would prevent a fourth at 640 seconds.
+
 	rcupdate.rcu_task_stall_timeout= [KNL]
-			Set timeout in jiffies for RCU task stall warning
-			messages.  Disable with a value less than or equal
-			to zero.
+			Set timeout in jiffies for RCU task stall
+			warning messages.  Disable with a value less
+			than or equal to zero.	Defaults to ten minutes.
+			A change in value does not take effect until
+			the beginning of the next grace period.
 
 	rcupdate.rcu_self_test= [KNL]
 			Run the RCU early boot self tests
@@ -5077,15 +5146,6 @@
 			Useful for devices that are detected asynchronously
 			(e.g. USB and MMC devices).
 
-	hibernate=	[HIBERNATION]
-		noresume	Don't check if there's a hibernation image
-				present during boot.
-		nocompress	Don't compress/decompress hibernation images.
-		no		Disable hibernation and resume.
-		protect_image	Turn on image protection during restoration
-				(that will set all pages holding image data
-				during restoration read-only).
-
 	retain_initrd	[RAM] Keep initrd memory after extraction
 
 	rfkill.default_state=
@@ -5308,6 +5368,8 @@
 
 	serialnumber	[BUGS=X86-32]
 
+	sev=option[,option...] [X86-64] See Documentation/x86/x86_64/boot-options.rst
+
 	shapers=	[NET]
 			Maximal number of shapers.
 
@@ -5377,6 +5439,17 @@
 	smart2=		[HW]
 			Format: <io1>[,<io2>[,...,<io8>]]
 
+	smp.csd_lock_timeout= [KNL]
+			Specify the period of time in milliseconds
+			that smp_call_function() and friends will wait
+			for a CPU to release the CSD lock.  This is
+			useful when diagnosing bugs involving CPUs
+			disabling interrupts for extended periods
+			of time.  Defaults to 5,000 milliseconds, and
+			setting a value of zero disables this feature.
+			This feature may be more efficiently disabled
+			using the csdlock_debug- kernel parameter.
+
 	smsc-ircc2.nopnp	[HW] Don't use PNP to discover SMC devices
 	smsc-ircc2.ircc_cfg=	[HW] Device configuration I/O port
 	smsc-ircc2.ircc_sir=	[HW] SIR base I/O port
@@ -5388,7 +5461,7 @@
 				1: Fast pin select (default)
 				2: ATC IRMode
 
-	smt		[KNL,S390] Set the maximum number of threads (logical
+	smt=		[KNL,S390] Set the maximum number of threads (logical
 			CPUs) to use per physical CPU on systems capable of
 			symmetric multithreading (SMT). Will be capped to the
 			actual hardware limit.
@@ -5608,6 +5681,30 @@
 			off:    Disable mitigation and remove
 				performance impact to RDRAND and RDSEED
 
+	srcutree.big_cpu_lim [KNL]
+			Specifies the number of CPUs constituting a
+			large system, such that srcu_struct structures
+			should immediately allocate an srcu_node array.
+			This kernel-boot parameter defaults to 128,
+			but takes effect only when the low-order four
+			bits of srcutree.convert_to_big is equal to 3
+			(decide at boot).
+
+	srcutree.convert_to_big [KNL]
+			Specifies under what conditions an SRCU tree
+			srcu_struct structure will be converted to big
+			form, that is, with an rcu_node tree:
+
+				   0:  Never.
+				   1:  At init_srcu_struct() time.
+				   2:  When rcutorture decides to.
+				   3:  Decide at boot time (default).
+				0x1X:  Above plus if high contention.
+
+			Either way, the srcu_node tree will be sized based
+			on the actual runtime number of CPUs (nr_cpu_ids)
+			instead of the compile-time CONFIG_NR_CPUS.
+
 	srcutree.counter_wrap_check [KNL]
 			Specifies how frequently to check for
 			grace-period sequence counter wrap for the
@@ -5625,6 +5722,14 @@
 			expediting.  Set to zero to disable automatic
 			expediting.
 
+	srcutree.small_contention_lim [KNL]
+			Specifies the number of update-side contention
+			events per jiffy will be tolerated before
+			initiating a conversion of an srcu_struct
+			structure to big form.	Note that the value of
+			srcutree.convert_to_big must have the 0x10 bit
+			set for contention-based conversions to occur.
+
 	ssbd=		[ARM64,HW]
 			Speculative Store Bypass Disable control
 
@@ -5743,8 +5848,9 @@
 			This parameter controls use of the Protected
 			Execution Facility on pSeries.
 
-	swapaccount=[0|1]
-			[KNL] Enable accounting of swap in memory resource
+	swapaccount=	[KNL]
+			Format: [0|1]
+			Enable accounting of swap in memory resource
 			controller if no parameter or 1 is given or disable
 			it if 0 is given (See Documentation/admin-guide/cgroup-v1/memory.rst)
 
@@ -5790,7 +5896,8 @@
 
 	tdfx=		[HW,DRM]
 
-	test_suspend=	[SUSPEND][,N]
+	test_suspend=	[SUSPEND]
+			Format: { "mem" | "standby" | "freeze" }[,N]
 			Specify "mem" (for Suspend-to-RAM) or "standby" (for
 			standby suspend) or "freeze" (for suspend type freeze)
 			as the system sleep state during system startup with
@@ -5874,32 +5981,7 @@
 			This will guarantee that all the other pcrs
 			are saved.
 
-	trace_buf_size=nn[KMG]
-			[FTRACE] will set tracing buffer size on each cpu.
-
-	trace_event=[event-list]
-			[FTRACE] Set and start specified trace events in order
-			to facilitate early boot debugging. The event-list is a
-			comma-separated list of trace events to enable. See
-			also Documentation/trace/events.rst
-
-	trace_options=[option-list]
-			[FTRACE] Enable or disable tracer options at boot.
-			The option-list is a comma delimited list of options
-			that can be enabled or disabled just as if you were
-			to echo the option name into
-
-			    /sys/kernel/debug/tracing/trace_options
-
-			For example, to enable stacktrace option (to dump the
-			stack trace of each event), add to the command line:
-
-			      trace_options=stacktrace
-
-			See also Documentation/trace/ftrace.rst "trace options"
-			section.
-
-	tp_printk[FTRACE]
+	tp_printk	[FTRACE]
 			Have the tracepoints sent to printk as well as the
 			tracing ring buffer. This is useful for early boot up
 			where the system hangs or reboots and does not give the
@@ -5921,7 +6003,7 @@
 			frequency tracepoints such as irq or sched, can cause
 			the system to live lock.
 
-	tp_printk_stop_on_boot[FTRACE]
+	tp_printk_stop_on_boot [FTRACE]
 			When tp_printk (above) is set, it can cause a lot of noise
 			on the console. It may be useful to only include the
 			printing of events during boot up, as user space may
@@ -5930,6 +6012,53 @@
 			This command line option will stop the printing of events
 			to console at the late_initcall_sync() time frame.
 
+	trace_buf_size=nn[KMG]
+			[FTRACE] will set tracing buffer size on each cpu.
+
+	trace_clock=	[FTRACE] Set the clock used for tracing events
+			at boot up.
+			local - Use the per CPU time stamp counter
+				(converted into nanoseconds). Fast, but
+				depending on the architecture, may not be
+				in sync between CPUs.
+			global - Event time stamps are synchronize across
+				CPUs. May be slower than the local clock,
+				but better for some race conditions.
+			counter - Simple counting of events (1, 2, ..)
+				note, some counts may be skipped due to the
+				infrastructure grabbing the clock more than
+				once per event.
+			uptime - Use jiffies as the time stamp.
+			perf - Use the same clock that perf uses.
+			mono - Use ktime_get_mono_fast_ns() for time stamps.
+			mono_raw - Use ktime_get_raw_fast_ns() for time
+				stamps.
+			boot - Use ktime_get_boot_fast_ns() for time stamps.
+			Architectures may add more clocks. See
+			Documentation/trace/ftrace.rst for more details.
+
+	trace_event=[event-list]
+			[FTRACE] Set and start specified trace events in order
+			to facilitate early boot debugging. The event-list is a
+			comma-separated list of trace events to enable. See
+			also Documentation/trace/events.rst
+
+	trace_options=[option-list]
+			[FTRACE] Enable or disable tracer options at boot.
+			The option-list is a comma delimited list of options
+			that can be enabled or disabled just as if you were
+			to echo the option name into
+
+			    /sys/kernel/debug/tracing/trace_options
+
+			For example, to enable stacktrace option (to dump the
+			stack trace of each event), add to the command line:
+
+			      trace_options=stacktrace
+
+			See also Documentation/trace/ftrace.rst "trace options"
+			section.
+
 	traceoff_on_warning
 			[FTRACE] enable this option to disable tracing when a
 			warning is hit. This turns off "tracing_on". Tracing can
@@ -5958,11 +6087,22 @@
 			sources:
 			- "tpm"
 			- "tee"
+			- "caam"
 			If not specified then it defaults to iterating through
 			the trust source list starting with TPM and assigns the
 			first trust source as a backend which is initialized
 			successfully during iteration.
 
+	trusted.rng=	[KEYS]
+			Format: <string>
+			The RNG used to generate key material for trusted keys.
+			Can be one of:
+			- "kernel"
+			- the same value as trusted.source: "tpm" or "tee"
+			- "default"
+			If not specified, "default" is used. In this case,
+			the RNG's choice is left to each individual trust source.
+
 	tsc=		Disable clocksource stability checks for TSC.
 			Format: <string>
 			[x86] reliable: mark tsc clocksource as reliable, this
@@ -6270,7 +6410,7 @@
 					HIGHMEM regardless of setting
 					of CONFIG_HIGHPTE.
 
-	vdso=		[X86,SH]
+	vdso=		[X86,SH,SPARC]
 			On X86_32, this is an alias for vdso32=.  Otherwise:
 
 			vdso=1: enable VDSO (the default)
@@ -6296,11 +6436,12 @@
 	video=		[FB] Frame buffer configuration
 			See Documentation/fb/modedb.rst.
 
-	video.brightness_switch_enabled= [0,1]
+	video.brightness_switch_enabled= [ACPI]
+			Format: [0|1]
 			If set to 1, on receiving an ACPI notify event
 			generated by hotkey, video driver will adjust brightness
 			level and then send out the event to user space through
-			the allocated input device; If set to 0, video driver
+			the allocated input device. If set to 0, video driver
 			will only send out the event without touching backlight
 			brightness level.
 			default: 1
diff --git a/Documentation/admin-guide/media/vimc.dot b/Documentation/admin-guide/media/vimc.dot
index 57863a13fa39..8e829c164626 100644
--- a/Documentation/admin-guide/media/vimc.dot
+++ b/Documentation/admin-guide/media/vimc.dot
@@ -9,14 +9,14 @@ digraph board {
 	n00000003:port0 -> n00000008:port0 [style=bold]
 	n00000003:port0 -> n0000000f [style=bold]
 	n00000005 [label="{{<port0> 0} | Debayer A\n/dev/v4l-subdev2 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
-	n00000005:port1 -> n00000017:port0
+	n00000005:port1 -> n00000015:port0
 	n00000008 [label="{{<port0> 0} | Debayer B\n/dev/v4l-subdev3 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
-	n00000008:port1 -> n00000017:port0 [style=dashed]
+	n00000008:port1 -> n00000015:port0 [style=dashed]
 	n0000000b [label="Raw Capture 0\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
 	n0000000f [label="Raw Capture 1\n/dev/video1", shape=box, style=filled, fillcolor=yellow]
-	n00000013 [label="RGB/YUV Input\n/dev/video2", shape=box, style=filled, fillcolor=yellow]
-	n00000013 -> n00000017:port0 [style=dashed]
-	n00000017 [label="{{<port0> 0} | Scaler\n/dev/v4l-subdev4 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
-	n00000017:port1 -> n0000001a [style=bold]
-	n0000001a [label="RGB/YUV Capture\n/dev/video3", shape=box, style=filled, fillcolor=yellow]
+	n00000013 [label="{{} | RGB/YUV Input\n/dev/v4l-subdev4 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+	n00000013:port0 -> n00000015:port0 [style=dashed]
+	n00000015 [label="{{<port0> 0} | Scaler\n/dev/v4l-subdev5 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+	n00000015:port1 -> n00000018 [style=bold]
+	n00000018 [label="RGB/YUV Capture\n/dev/video2", shape=box, style=filled, fillcolor=yellow]
 }
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 1144ea3229a3..ddccd1077462 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -783,6 +783,13 @@ is useful to define the root cause of RCU stalls using a vmcore.
 1 panic() after printing RCU stall messages.
 = ============================================================
 
+max_rcu_stall_to_panic
+======================
+
+When ``panic_on_rcu_stall`` is set to 1, this value determines the
+number of times that RCU can stall before panic() is called.
+
+When ``panic_on_rcu_stall`` is set to 0, this value is has no effect.
 
 perf_cpu_time_max_percent
 =========================
@@ -994,6 +1001,9 @@ This is a directory, with the following entries:
 * ``boot_id``: a UUID generated the first time this is retrieved, and
   unvarying after that;
 
+* ``uuid``: a UUID generated every time this is retrieved (this can
+  thus be used to generate UUIDs at will);
+
 * ``entropy_avail``: the pool's entropy count, in bits;
 
 * ``poolsize``: the entropy pool size, in bits;
@@ -1001,10 +1011,7 @@ This is a directory, with the following entries:
 * ``urandom_min_reseed_secs``: obsolete (used to determine the minimum
   number of seconds between urandom pool reseeding). This file is
   writable for compatibility purposes, but writing to it has no effect
-  on any RNG behavior.
-
-* ``uuid``: a UUID generated every time this is retrieved (this can
-  thus be used to generate UUIDs at will);
+  on any RNG behavior;
 
 * ``write_wakeup_threshold``: when the entropy count drops below this
   (as a number of bits), processes waiting to write to ``/dev/random``
diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
index 29884b261aa9..8aefa1001ae5 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -350,6 +350,16 @@ Before jumping into the kernel, the following conditions must be met:
 
     - SMCR_EL2.FA64 (bit 31) must be initialised to 0b1.
 
+  For CPUs with the Memory Tagging Extension feature (FEAT_MTE2):
+
+  - If EL3 is present:
+
+    - SCR_EL3.ATA (bit 26) must be initialised to 0b1.
+
+  - If the kernel is entered at EL1 and EL2 is present:
+
+    - HCR_EL2.ATA (bit 56) must be initialised to 0b1.
+
 The requirements described above for CPU mode, caches, MMUs, architected
 timers, coherency and system registers apply to all CPUs.  All CPUs must
 enter the kernel in the same exception level.  Where the values documented
diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
index a8f30963e550..f8d818eaaff5 100644
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@@ -264,6 +264,39 @@ HWCAP2_MTE3
     Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
     by Documentation/arm64/memory-tagging-extension.rst.
 
+HWCAP2_SME
+
+    Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described
+    by Documentation/arm64/sme.rst.
+
+HWCAP2_SME_I16I64
+
+    Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111.
+
+HWCAP2_SME_F64F64
+
+    Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1.
+
+HWCAP2_SME_I8I32
+
+    Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111.
+
+HWCAP2_SME_F16F32
+
+    Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1.
+
+HWCAP2_SME_B16F32
+
+    Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1.
+
+HWCAP2_SME_F32F32
+
+    Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1.
+
+HWCAP2_SME_FA64
+
+    Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1.
+
 4. Unused AT_HWCAP bits
 -----------------------
 
diff --git a/Documentation/arm64/index.rst b/Documentation/arm64/index.rst
index 4f840bac083e..ae21f8118830 100644
--- a/Documentation/arm64/index.rst
+++ b/Documentation/arm64/index.rst
@@ -21,6 +21,7 @@ ARM64 Architecture
     perf
     pointer-authentication
     silicon-errata
+    sme
     sve
     tagged-address-abi
     tagged-pointers
diff --git a/Documentation/arm64/sme.rst b/Documentation/arm64/sme.rst
new file mode 100644
index 000000000000..8ba677b87e90
--- /dev/null
+++ b/Documentation/arm64/sme.rst
@@ -0,0 +1,428 @@
+===================================================
+Scalable Matrix Extension support for AArch64 Linux
+===================================================
+
+This document outlines briefly the interface provided to userspace by Linux in
+order to support use of the ARM Scalable Matrix Extension (SME).
+
+This is an outline of the most important features and issues only and not
+intended to be exhaustive.  It should be read in conjunction with the SVE
+documentation in sve.rst which provides details on the Streaming SVE mode
+included in SME.
+
+This document does not aim to describe the SME architecture or programmer's
+model.  To aid understanding, a minimal description of relevant programmer's
+model features for SME is included in Appendix A.
+
+
+1.  General
+-----------
+
+* PSTATE.SM, PSTATE.ZA, the streaming mode vector length, the ZA
+  register state and TPIDR2_EL0 are tracked per thread.
+
+* The presence of SME is reported to userspace via HWCAP2_SME in the aux vector
+  AT_HWCAP2 entry.  Presence of this flag implies the presence of the SME
+  instructions and registers, and the Linux-specific system interfaces
+  described in this document.  SME is reported in /proc/cpuinfo as "sme".
+
+* Support for the execution of SME instructions in userspace can also be
+  detected by reading the CPU ID register ID_AA64PFR1_EL1 using an MRS
+  instruction, and checking that the value of the SME field is nonzero. [3]
+
+  It does not guarantee the presence of the system interfaces described in the
+  following sections: software that needs to verify that those interfaces are
+  present must check for HWCAP2_SME instead.
+
+* There are a number of optional SME features, presence of these is reported
+  through AT_HWCAP2 through:
+
+	HWCAP2_SME_I16I64
+	HWCAP2_SME_F64F64
+	HWCAP2_SME_I8I32
+	HWCAP2_SME_F16F32
+	HWCAP2_SME_B16F32
+	HWCAP2_SME_F32F32
+	HWCAP2_SME_FA64
+
+  This list may be extended over time as the SME architecture evolves.
+
+  These extensions are also reported via the CPU ID register ID_AA64SMFR0_EL1,
+  which userspace can read using an MRS instruction.  See elf_hwcaps.txt and
+  cpu-feature-registers.txt for details.
+
+* Debuggers should restrict themselves to interacting with the target via the
+  NT_ARM_SVE, NT_ARM_SSVE and NT_ARM_ZA regsets.  The recommended way
+  of detecting support for these regsets is to connect to a target process
+  first and then attempt a
+
+	ptrace(PTRACE_GETREGSET, pid, NT_ARM_<regset>, &iov).
+
+* Whenever ZA register values are exchanged in memory between userspace and
+  the kernel, the register value is encoded in memory as a series of horizontal
+  vectors from 0 to VL/8-1 stored in the same endianness invariant format as is
+  used for SVE vectors.
+
+* On thread creation TPIDR2_EL0 is preserved unless CLONE_SETTLS is specified,
+  in which case it is set to 0.
+
+2.  Vector lengths
+------------------
+
+SME defines a second vector length similar to the SVE vector length which is
+controls the size of the streaming mode SVE vectors and the ZA matrix array.
+The ZA matrix is square with each side having as many bytes as a streaming
+mode SVE vector.
+
+
+3.  Sharing of streaming and non-streaming mode SVE state
+---------------------------------------------------------
+
+It is implementation defined which if any parts of the SVE state are shared
+between streaming and non-streaming modes.  When switching between modes
+via software interfaces such as ptrace if no register content is provided as
+part of switching no state will be assumed to be shared and everything will
+be zeroed.
+
+
+4.  System call behaviour
+-------------------------
+
+* On syscall PSTATE.ZA is preserved, if PSTATE.ZA==1 then the contents of the
+  ZA matrix are preserved.
+
+* On syscall PSTATE.SM will be cleared and the SVE registers will be handled
+  as per the standard SVE ABI.
+
+* Neither the SVE registers nor ZA are used to pass arguments to or receive
+  results from any syscall.
+
+* On process creation (eg, clone()) the newly created process will have
+  PSTATE.SM cleared.
+
+* All other SME state of a thread, including the currently configured vector
+  length, the state of the PR_SME_VL_INHERIT flag, and the deferred vector
+  length (if any), is preserved across all syscalls, subject to the specific
+  exceptions for execve() described in section 6.
+
+
+5.  Signal handling
+-------------------
+
+* Signal handlers are invoked with streaming mode and ZA disabled.
+
+* A new signal frame record za_context encodes the ZA register contents on
+  signal delivery. [1]
+
+* The signal frame record for ZA always contains basic metadata, in particular
+  the thread's vector length (in za_context.vl).
+
+* The ZA matrix may or may not be included in the record, depending on
+  the value of PSTATE.ZA.  The registers are present if and only if:
+  za_context.head.size >= ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl))
+  in which case PSTATE.ZA == 1.
+
+* If matrix data is present, the remainder of the record has a vl-dependent
+  size and layout.  Macros ZA_SIG_* are defined [1] to facilitate access to
+  them.
+
+* The matrix is stored as a series of horizontal vectors in the same format as
+  is used for SVE vectors.
+
+* If the ZA context is too big to fit in sigcontext.__reserved[], then extra
+  space is allocated on the stack, an extra_context record is written in
+  __reserved[] referencing this space.  za_context is then written in the
+  extra space.  Refer to [1] for further details about this mechanism.
+
+
+5.  Signal return
+-----------------
+
+When returning from a signal handler:
+
+* If there is no za_context record in the signal frame, or if the record is
+  present but contains no register data as described in the previous section,
+  then ZA is disabled.
+
+* If za_context is present in the signal frame and contains matrix data then
+  PSTATE.ZA is set to 1 and ZA is populated with the specified data.
+
+* The vector length cannot be changed via signal return.  If za_context.vl in
+  the signal frame does not match the current vector length, the signal return
+  attempt is treated as illegal, resulting in a forced SIGSEGV.
+
+
+6.  prctl extensions
+--------------------
+
+Some new prctl() calls are added to allow programs to manage the SME vector
+length:
+
+prctl(PR_SME_SET_VL, unsigned long arg)
+
+    Sets the vector length of the calling thread and related flags, where
+    arg == vl | flags.  Other threads of the calling process are unaffected.
+
+    vl is the desired vector length, where sve_vl_valid(vl) must be true.
+
+    flags:
+
+	PR_SME_VL_INHERIT
+
+	    Inherit the current vector length across execve().  Otherwise, the
+	    vector length is reset to the system default at execve().  (See
+	    Section 9.)
+
+	PR_SME_SET_VL_ONEXEC
+
+	    Defer the requested vector length change until the next execve()
+	    performed by this thread.
+
+	    The effect is equivalent to implicit execution of the following
+	    call immediately after the next execve() (if any) by the thread:
+
+		prctl(PR_SME_SET_VL, arg & ~PR_SME_SET_VL_ONEXEC)
+
+	    This allows launching of a new program with a different vector
+	    length, while avoiding runtime side effects in the caller.
+
+	    Without PR_SME_SET_VL_ONEXEC, the requested change takes effect
+	    immediately.
+
+
+    Return value: a nonnegative on success, or a negative value on error:
+	EINVAL: SME not supported, invalid vector length requested, or
+	    invalid flags.
+
+
+    On success:
+
+    * Either the calling thread's vector length or the deferred vector length
+      to be applied at the next execve() by the thread (dependent on whether
+      PR_SME_SET_VL_ONEXEC is present in arg), is set to the largest value
+      supported by the system that is less than or equal to vl.  If vl ==
+      SVE_VL_MAX, the value set will be the largest value supported by the
+      system.
+
+    * Any previously outstanding deferred vector length change in the calling
+      thread is cancelled.
+
+    * The returned value describes the resulting configuration, encoded as for
+      PR_SME_GET_VL.  The vector length reported in this value is the new
+      current vector length for this thread if PR_SME_SET_VL_ONEXEC was not
+      present in arg; otherwise, the reported vector length is the deferred
+      vector length that will be applied at the next execve() by the calling
+      thread.
+
+    * Changing the vector length causes all of ZA, P0..P15, FFR and all bits of
+      Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
+      unspecified, including both streaming and non-streaming SVE state.
+      Calling PR_SME_SET_VL with vl equal to the thread's current vector
+      length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
+      does not constitute a change to the vector length for this purpose.
+
+    * Changing the vector length causes PSTATE.ZA and PSTATE.SM to be cleared.
+      Calling PR_SME_SET_VL with vl equal to the thread's current vector
+      length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
+      does not constitute a change to the vector length for this purpose.
+
+
+prctl(PR_SME_GET_VL)
+
+    Gets the vector length of the calling thread.
+
+    The following flag may be OR-ed into the result:
+
+	PR_SME_VL_INHERIT
+
+	    Vector length will be inherited across execve().
+
+    There is no way to determine whether there is an outstanding deferred
+    vector length change (which would only normally be the case between a
+    fork() or vfork() and the corresponding execve() in typical use).
+
+    To extract the vector length from the result, bitwise and it with
+    PR_SME_VL_LEN_MASK.
+
+    Return value: a nonnegative value on success, or a negative value on error:
+	EINVAL: SME not supported.
+
+
+7.  ptrace extensions
+---------------------
+
+* A new regset NT_ARM_SSVE is defined for access to streaming mode SVE
+  state via PTRACE_GETREGSET and  PTRACE_SETREGSET, this is documented in
+  sve.rst.
+
+* A new regset NT_ARM_ZA is defined for ZA state for access to ZA state via
+  PTRACE_GETREGSET and PTRACE_SETREGSET.
+
+  Refer to [2] for definitions.
+
+The regset data starts with struct user_za_header, containing:
+
+    size
+
+	Size of the complete regset, in bytes.
+	This depends on vl and possibly on other things in the future.
+
+	If a call to PTRACE_GETREGSET requests less data than the value of
+	size, the caller can allocate a larger buffer and retry in order to
+	read the complete regset.
+
+    max_size
+
+	Maximum size in bytes that the regset can grow to for the target
+	thread.  The regset won't grow bigger than this even if the target
+	thread changes its vector length etc.
+
+    vl
+
+	Target thread's current streaming vector length, in bytes.
+
+    max_vl
+
+	Maximum possible streaming vector length for the target thread.
+
+    flags
+
+	Zero or more of the following flags, which have the same
+	meaning and behaviour as the corresponding PR_SET_VL_* flags:
+
+	    SME_PT_VL_INHERIT
+
+	    SME_PT_VL_ONEXEC (SETREGSET only).
+
+* The effects of changing the vector length and/or flags are equivalent to
+  those documented for PR_SME_SET_VL.
+
+  The caller must make a further GETREGSET call if it needs to know what VL is
+  actually set by SETREGSET, unless is it known in advance that the requested
+  VL is supported.
+
+* The size and layout of the payload depends on the header fields.  The
+  SME_PT_ZA_*() macros are provided to facilitate access to the data.
+
+* In either case, for SETREGSET it is permissible to omit the payload, in which
+  case the vector length and flags are changed and PSTATE.ZA is set to 0
+  (along with any consequences of those changes).  If a payload is provided
+  then PSTATE.ZA will be set to 1.
+
+* For SETREGSET, if the requested VL is not supported, the effect will be the
+  same as if the payload were omitted, except that an EIO error is reported.
+  No attempt is made to translate the payload data to the correct layout
+  for the vector length actually set.  It is up to the caller to translate the
+  payload layout for the actual VL and retry.
+
+* The effect of writing a partial, incomplete payload is unspecified.
+
+
+8.  ELF coredump extensions
+---------------------------
+
+* NT_ARM_SSVE notes will be added to each coredump for
+  each thread of the dumped process.  The contents will be equivalent to the
+  data that would have been read if a PTRACE_GETREGSET of the corresponding
+  type were executed for each thread when the coredump was generated.
+
+* A NT_ARM_ZA note will be added to each coredump for each thread of the
+  dumped process.  The contents will be equivalent to the data that would have
+  been read if a PTRACE_GETREGSET of NT_ARM_ZA were executed for each thread
+  when the coredump was generated.
+
+
+9.  System runtime configuration
+--------------------------------
+
+* To mitigate the ABI impact of expansion of the signal frame, a policy
+  mechanism is provided for administrators, distro maintainers and developers
+  to set the default vector length for userspace processes:
+
+/proc/sys/abi/sme_default_vector_length
+
+    Writing the text representation of an integer to this file sets the system
+    default vector length to the specified value, unless the value is greater
+    than the maximum vector length supported by the system in which case the
+    default vector length is set to that maximum.
+
+    The result can be determined by reopening the file and reading its
+    contents.
+
+    At boot, the default vector length is initially set to 32 or the maximum
+    supported vector length, whichever is smaller and supported.  This
+    determines the initial vector length of the init process (PID 1).
+
+    Reading this file returns the current system default vector length.
+
+* At every execve() call, the new vector length of the new process is set to
+  the system default vector length, unless
+
+    * PR_SME_VL_INHERIT (or equivalently SME_PT_VL_INHERIT) is set for the
+      calling thread, or
+
+    * a deferred vector length change is pending, established via the
+      PR_SME_SET_VL_ONEXEC flag (or SME_PT_VL_ONEXEC).
+
+* Modifying the system default vector length does not affect the vector length
+  of any existing process or thread that does not make an execve() call.
+
+
+Appendix A.  SME programmer's model (informative)
+=================================================
+
+This section provides a minimal description of the additions made by SVE to the
+ARMv8-A programmer's model that are relevant to this document.
+
+Note: This section is for information only and not intended to be complete or
+to replace any architectural specification.
+
+A.1.  Registers
+---------------
+
+In A64 state, SME adds the following:
+
+* A new mode, streaming mode, in which a subset of the normal FPSIMD and SVE
+  features are available.  When supported EL0 software may enter and leave
+  streaming mode at any time.
+
+  For best system performance it is strongly encouraged for software to enable
+  streaming mode only when it is actively being used.
+
+* A new vector length controlling the size of ZA and the Z registers when in
+  streaming mode, separately to the vector length used for SVE when not in
+  streaming mode.  There is no requirement that either the currently selected
+  vector length or the set of vector lengths supported for the two modes in
+  a given system have any relationship.  The streaming mode vector length
+  is referred to as SVL.
+
+* A new ZA matrix register.  This is a square matrix of SVLxSVL bits.  Most
+  operations on ZA require that streaming mode be enabled but ZA can be
+  enabled without streaming mode in order to load, save and retain data.
+
+  For best system performance it is strongly encouraged for software to enable
+  ZA only when it is actively being used.
+
+* Two new 1 bit fields in PSTATE which may be controlled via the SMSTART and
+  SMSTOP instructions or by access to the SVCR system register:
+
+  * PSTATE.ZA, if this is 1 then the ZA matrix is accessible and has valid
+    data while if it is 0 then ZA can not be accessed.  When PSTATE.ZA is
+    changed from 0 to 1 all bits in ZA are cleared.
+
+  * PSTATE.SM, if this is 1 then the PE is in streaming mode.  When the value
+    of PSTATE.SM is changed then it is implementation defined if the subset
+    of the floating point register bits valid in both modes may be retained.
+    Any other bits will be cleared.
+
+
+References
+==========
+
+[1] arch/arm64/include/uapi/asm/sigcontext.h
+    AArch64 Linux signal ABI definitions
+
+[2] arch/arm64/include/uapi/asm/ptrace.h
+    AArch64 Linux ptrace ABI definitions
+
+[3] Documentation/arm64/cpu-feature-registers.rst
diff --git a/Documentation/arm64/sve.rst b/Documentation/arm64/sve.rst
index 9d9a4de5bc34..93c2c2990584 100644
--- a/Documentation/arm64/sve.rst
+++ b/Documentation/arm64/sve.rst
@@ -7,7 +7,9 @@ Author: Dave Martin <Dave.Martin@arm.com>
 Date:   4 August 2017
 
 This document outlines briefly the interface provided to userspace by Linux in
-order to support use of the ARM Scalable Vector Extension (SVE).
+order to support use of the ARM Scalable Vector Extension (SVE), including
+interactions with Streaming SVE mode added by the Scalable Matrix Extension
+(SME).
 
 This is an outline of the most important features and issues only and not
 intended to be exhaustive.
@@ -23,6 +25,10 @@ model features for SVE is included in Appendix A.
 * SVE registers Z0..Z31, P0..P15 and FFR and the current vector length VL, are
   tracked per-thread.
 
+* In streaming mode FFR is not accessible unless HWCAP2_SME_FA64 is present
+  in the system, when it is not supported and these interfaces are used to
+  access streaming mode FFR is read and written as zero.
+
 * The presence of SVE is reported to userspace via HWCAP_SVE in the aux vector
   AT_HWCAP entry.  Presence of this flag implies the presence of the SVE
   instructions and registers, and the Linux-specific system interfaces
@@ -53,10 +59,19 @@ model features for SVE is included in Appendix A.
   which userspace can read using an MRS instruction.  See elf_hwcaps.txt and
   cpu-feature-registers.txt for details.
 
+* On hardware that supports the SME extensions, HWCAP2_SME will also be
+  reported in the AT_HWCAP2 aux vector entry.  Among other things SME adds
+  streaming mode which provides a subset of the SVE feature set using a
+  separate SME vector length and the same Z/V registers.  See sme.rst
+  for more details.
+
 * Debuggers should restrict themselves to interacting with the target via the
   NT_ARM_SVE regset.  The recommended way of detecting support for this regset
   is to connect to a target process first and then attempt a
-  ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
+  ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).  Note that when SME is
+  present and streaming SVE mode is in use the FPSIMD subset of registers
+  will be read via NT_ARM_SVE and NT_ARM_SVE writes will exit streaming mode
+  in the target.
 
 * Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory
   between userspace and the kernel, the register value is encoded in memory in
@@ -126,6 +141,11 @@ the SVE instruction set architecture.
   are only present in fpsimd_context.  For convenience, the content of V0..V31
   is duplicated between sve_context and fpsimd_context.
 
+* The record contains a flag field which includes a flag SVE_SIG_FLAG_SM which
+  if set indicates that the thread is in streaming mode and the vector length
+  and register data (if present) describe the streaming SVE data and vector
+  length.
+
 * The signal frame record for SVE always contains basic metadata, in particular
   the thread's vector length (in sve_context.vl).
 
@@ -170,6 +190,11 @@ When returning from a signal handler:
   the signal frame does not match the current vector length, the signal return
   attempt is treated as illegal, resulting in a forced SIGSEGV.
 
+* It is permitted to enter or leave streaming mode by setting or clearing
+  the SVE_SIG_FLAG_SM flag but applications should take care to ensure that
+  when doing so sve_context.vl and any register data are appropriate for the
+  vector length in the new mode.
+
 
 6.  prctl extensions
 --------------------
@@ -265,8 +290,14 @@ prctl(PR_SVE_GET_VL)
 7.  ptrace extensions
 ---------------------
 
-* A new regset NT_ARM_SVE is defined for use with PTRACE_GETREGSET and
-  PTRACE_SETREGSET.
+* New regsets NT_ARM_SVE and NT_ARM_SSVE are defined for use with
+  PTRACE_GETREGSET and PTRACE_SETREGSET. NT_ARM_SSVE describes the
+  streaming mode SVE registers and NT_ARM_SVE describes the
+  non-streaming mode SVE registers.
+
+  In this description a register set is referred to as being "live" when
+  the target is in the appropriate streaming or non-streaming mode and is
+  using data beyond the subset shared with the FPSIMD Vn registers.
 
   Refer to [2] for definitions.
 
@@ -297,7 +328,7 @@ The regset data starts with struct user_sve_header, containing:
 
     flags
 
-	either
+	at most one of
 
 	    SVE_PT_REGS_FPSIMD
 
@@ -331,6 +362,10 @@ The regset data starts with struct user_sve_header, containing:
 
 	    SVE_PT_VL_ONEXEC (SETREGSET only).
 
+	If neither FPSIMD nor SVE flags are provided then no register
+	payload is available, this is only possible when SME is implemented.
+
+
 * The effects of changing the vector length and/or flags are equivalent to
   those documented for PR_SVE_SET_VL.
 
@@ -346,6 +381,13 @@ The regset data starts with struct user_sve_header, containing:
   case only the vector length and flags are changed (along with any
   consequences of those changes).
 
+* In systems supporting SME when in streaming mode a GETREGSET for
+  NT_REG_SVE will return only the user_sve_header with no register data,
+  similarly a GETREGSET for NT_REG_SSVE will not return any register data
+  when not in streaming mode.
+
+* A GETREGSET for NT_ARM_SSVE will never return SVE_PT_REGS_FPSIMD.
+
 * For SETREGSET, if an SVE_PT_REGS_SVE payload is present and the
   requested VL is not supported, the effect will be the same as if the
   payload were omitted, except that an EIO error is reported.  No
@@ -355,17 +397,25 @@ The regset data starts with struct user_sve_header, containing:
   unspecified.  It is up to the caller to translate the payload layout
   for the actual VL and retry.
 
+* Where SME is implemented it is not possible to GETREGSET the register
+  state for normal SVE when in streaming mode, nor the streaming mode
+  register state when in normal mode, regardless of the implementation defined
+  behaviour of the hardware for sharing data between the two modes.
+
+* Any SETREGSET of NT_ARM_SVE will exit streaming mode if the target was in
+  streaming mode and any SETREGSET of NT_ARM_SSVE will enter streaming mode
+  if the target was not in streaming mode.
+
 * The effect of writing a partial, incomplete payload is unspecified.
 
 
 8.  ELF coredump extensions
 ---------------------------
 
-* A NT_ARM_SVE note will be added to each coredump for each thread of the
-  dumped process.  The contents will be equivalent to the data that would have
-  been read if a PTRACE_GETREGSET of NT_ARM_SVE were executed for each thread
-  when the coredump was generated.
-
+* NT_ARM_SVE and NT_ARM_SSVE notes will be added to each coredump for
+  each thread of the dumped process.  The contents will be equivalent to the
+  data that would have been read if a PTRACE_GETREGSET of the corresponding
+  type were executed for each thread when the coredump was generated.
 
 9.  System runtime configuration
 --------------------------------
diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst
index 52ea7b6b2fe8..7964fe134277 100644
--- a/Documentation/cdrom/cdrom-standard.rst
+++ b/Documentation/cdrom/cdrom-standard.rst
@@ -218,7 +218,6 @@ current *struct* is::
 		int (*tray_move)(struct cdrom_device_info *, int);
 		int (*lock_door)(struct cdrom_device_info *, int);
 		int (*select_speed)(struct cdrom_device_info *, int);
-		int (*select_disc)(struct cdrom_device_info *, int);
 		int (*get_last_session) (struct cdrom_device_info *,
 					 struct cdrom_multisession *);
 		int (*get_mcn)(struct cdrom_device_info *, struct cdrom_mcn *);
@@ -421,15 +420,6 @@ return value indicates an error.
 
 ::
 
-	int select_disc(struct cdrom_device_info *cdi, int number)
-
-If the drive can store multiple discs (a juke-box) this function
-will perform disc selection. It should return the number of the
-selected disc on success, a negative value on error. Currently, only
-the ide-cd driver supports this functionality.
-
-::
-
 	int get_last_session(struct cdrom_device_info *cdi,
 			     struct cdrom_multisession *ms_info)
 
diff --git a/Documentation/cdrom/ide-cd.rst b/Documentation/cdrom/ide-cd.rst
deleted file mode 100644
index bdccb74fc92d..000000000000
--- a/Documentation/cdrom/ide-cd.rst
+++ /dev/null
@@ -1,538 +0,0 @@
-IDE-CD driver documentation
-===========================
-
-:Originally by: scott snyder  <snyder@fnald0.fnal.gov> (19 May 1996)
-:Carrying on the torch is: Erik Andersen <andersee@debian.org>
-:New maintainers (19 Oct 1998): Jens Axboe <axboe@image.dk>
-
-1. Introduction
----------------
-
-The ide-cd driver should work with all ATAPI ver 1.2 to ATAPI 2.6 compliant
-CDROM drives which attach to an IDE interface.  Note that some CDROM vendors
-(including Mitsumi, Sony, Creative, Aztech, and Goldstar) have made
-both ATAPI-compliant drives and drives which use a proprietary
-interface.  If your drive uses one of those proprietary interfaces,
-this driver will not work with it (but one of the other CDROM drivers
-probably will).  This driver will not work with `ATAPI` drives which
-attach to the parallel port.  In addition, there is at least one drive
-(CyCDROM CR520ie) which attaches to the IDE port but is not ATAPI;
-this driver will not work with drives like that either (but see the
-aztcd driver).
-
-This driver provides the following features:
-
- - Reading from data tracks, and mounting ISO 9660 filesystems.
-
- - Playing audio tracks.  Most of the CDROM player programs floating
-   around should work; I usually use Workman.
-
- - Multisession support.
-
- - On drives which support it, reading digital audio data directly
-   from audio tracks.  The program cdda2wav can be used for this.
-   Note, however, that only some drives actually support this.
-
- - There is now support for CDROM changers which comply with the
-   ATAPI 2.6 draft standard (such as the NEC CDR-251).  This additional
-   functionality includes a function call to query which slot is the
-   currently selected slot, a function call to query which slots contain
-   CDs, etc. A sample program which demonstrates this functionality is
-   appended to the end of this file.  The Sanyo 3-disc changer
-   (which does not conform to the standard) is also now supported.
-   Please note the driver refers to the first CD as slot # 0.
-
-
-2. Installation
----------------
-
-0. The ide-cd relies on the ide disk driver.  See
-   Documentation/ide/ide.rst for up-to-date information on the ide
-   driver.
-
-1. Make sure that the ide and ide-cd drivers are compiled into the
-   kernel you're using.  When configuring the kernel, in the section
-   entitled "Floppy, IDE, and other block devices", say either `Y`
-   (which will compile the support directly into the kernel) or `M`
-   (to compile support as a module which can be loaded and unloaded)
-   to the options::
-
-      ATA/ATAPI/MFM/RLL support
-      Include IDE/ATAPI CDROM support
-
-   Depending on what type of IDE interface you have, you may need to
-   specify additional configuration options.  See
-   Documentation/ide/ide.rst.
-
-2. You should also ensure that the iso9660 filesystem is either
-   compiled into the kernel or available as a loadable module.  You
-   can see if a filesystem is known to the kernel by catting
-   /proc/filesystems.
-
-3. The CDROM drive should be connected to the host on an IDE
-   interface.  Each interface on a system is defined by an I/O port
-   address and an IRQ number, the standard assignments being
-   0x1f0 and 14 for the primary interface and 0x170 and 15 for the
-   secondary interface.  Each interface can control up to two devices,
-   where each device can be a hard drive, a CDROM drive, a floppy drive,
-   or a tape drive.  The two devices on an interface are called `master`
-   and `slave`; this is usually selectable via a jumper on the drive.
-
-   Linux names these devices as follows.  The master and slave devices
-   on the primary IDE interface are called `hda` and `hdb`,
-   respectively.  The drives on the secondary interface are called
-   `hdc` and `hdd`.  (Interfaces at other locations get other letters
-   in the third position; see Documentation/ide/ide.rst.)
-
-   If you want your CDROM drive to be found automatically by the
-   driver, you should make sure your IDE interface uses either the
-   primary or secondary addresses mentioned above.  In addition, if
-   the CDROM drive is the only device on the IDE interface, it should
-   be jumpered as `master`.  (If for some reason you cannot configure
-   your system in this manner, you can probably still use the driver.
-   You may have to pass extra configuration information to the kernel
-   when you boot, however.  See Documentation/ide/ide.rst for more
-   information.)
-
-4. Boot the system.  If the drive is recognized, you should see a
-   message which looks like::
-
-     hdb: NEC CD-ROM DRIVE:260, ATAPI CDROM drive
-
-   If you do not see this, see section 5 below.
-
-5. You may want to create a symbolic link /dev/cdrom pointing to the
-   actual device.  You can do this with the command::
-
-     ln -s  /dev/hdX  /dev/cdrom
-
-   where X should be replaced by the letter indicating where your
-   drive is installed.
-
-6. You should be able to see any error messages from the driver with
-   the `dmesg` command.
-
-
-3. Basic usage
---------------
-
-An ISO 9660 CDROM can be mounted by putting the disc in the drive and
-typing (as root)::
-
-  mount -t iso9660 /dev/cdrom /mnt/cdrom
-
-where it is assumed that /dev/cdrom is a link pointing to the actual
-device (as described in step 5 of the last section) and /mnt/cdrom is
-an empty directory.  You should now be able to see the contents of the
-CDROM under the /mnt/cdrom directory.  If you want to eject the CDROM,
-you must first dismount it with a command like::
-
-  umount /mnt/cdrom
-
-Note that audio CDs cannot be mounted.
-
-Some distributions set up /etc/fstab to always try to mount a CDROM
-filesystem on bootup.  It is not required to mount the CDROM in this
-manner, though, and it may be a nuisance if you change CDROMs often.
-You should feel free to remove the cdrom line from /etc/fstab and
-mount CDROMs manually if that suits you better.
-
-Multisession and photocd discs should work with no special handling.
-The hpcdtoppm package (ftp.gwdg.de:/pub/linux/hpcdtoppm/) may be
-useful for reading photocds.
-
-To play an audio CD, you should first unmount and remove any data
-CDROM.  Any of the CDROM player programs should then work (workman,
-workbone, cdplayer, etc.).
-
-On a few drives, you can read digital audio directly using a program
-such as cdda2wav.  The only types of drive which I've heard support
-this are Sony and Toshiba drives.  You will get errors if you try to
-use this function on a drive which does not support it.
-
-For supported changers, you can use the `cdchange` program (appended to
-the end of this file) to switch between changer slots.  Note that the
-drive should be unmounted before attempting this.  The program takes
-two arguments:  the CDROM device, and the slot number to which you wish
-to change.  If the slot number is -1, the drive is unloaded.
-
-
-4. Common problems
-------------------
-
-This section discusses some common problems encountered when trying to
-use the driver, and some possible solutions.  Note that if you are
-experiencing problems, you should probably also review
-Documentation/ide/ide.rst for current information about the underlying
-IDE support code.  Some of these items apply only to earlier versions
-of the driver, but are mentioned here for completeness.
-
-In most cases, you should probably check with `dmesg` for any errors
-from the driver.
-
-a. Drive is not detected during booting.
-
-   - Review the configuration instructions above and in
-     Documentation/ide/ide.rst, and check how your hardware is
-     configured.
-
-   - If your drive is the only device on an IDE interface, it should
-     be jumpered as master, if at all possible.
-
-   - If your IDE interface is not at the standard addresses of 0x170
-     or 0x1f0, you'll need to explicitly inform the driver using a
-     lilo option.  See Documentation/ide/ide.rst.  (This feature was
-     added around kernel version 1.3.30.)
-
-   - If the autoprobing is not finding your drive, you can tell the
-     driver to assume that one exists by using a lilo option of the
-     form `hdX=cdrom`, where X is the drive letter corresponding to
-     where your drive is installed.  Note that if you do this and you
-     see a boot message like::
-
-       hdX: ATAPI cdrom (?)
-
-     this does _not_ mean that the driver has successfully detected
-     the drive; rather, it means that the driver has not detected a
-     drive, but is assuming there's one there anyway because you told
-     it so.  If you actually try to do I/O to a drive defined at a
-     nonexistent or nonresponding I/O address, you'll probably get
-     errors with a status value of 0xff.
-
-   - Some IDE adapters require a nonstandard initialization sequence
-     before they'll function properly.  (If this is the case, there
-     will often be a separate MS-DOS driver just for the controller.)
-     IDE interfaces on sound cards often fall into this category.
-
-     Support for some interfaces needing extra initialization is
-     provided in later 1.3.x kernels.  You may need to turn on
-     additional kernel configuration options to get them to work;
-     see Documentation/ide/ide.rst.
-
-     Even if support is not available for your interface, you may be
-     able to get it to work with the following procedure.  First boot
-     MS-DOS and load the appropriate drivers.  Then warm-boot linux
-     (i.e., without powering off).  If this works, it can be automated
-     by running loadlin from the MS-DOS autoexec.
-
-
-b. Timeout/IRQ errors.
-
-  - If you always get timeout errors, interrupts from the drive are
-    probably not making it to the host.
-
-  - IRQ problems may also be indicated by the message
-    `IRQ probe failed (<n>)` while booting.  If <n> is zero, that
-    means that the system did not see an interrupt from the drive when
-    it was expecting one (on any feasible IRQ).  If <n> is negative,
-    that means the system saw interrupts on multiple IRQ lines, when
-    it was expecting to receive just one from the CDROM drive.
-
-  - Double-check your hardware configuration to make sure that the IRQ
-    number of your IDE interface matches what the driver expects.
-    (The usual assignments are 14 for the primary (0x1f0) interface
-    and 15 for the secondary (0x170) interface.)  Also be sure that
-    you don't have some other hardware which might be conflicting with
-    the IRQ you're using.  Also check the BIOS setup for your system;
-    some have the ability to disable individual IRQ levels, and I've
-    had one report of a system which was shipped with IRQ 15 disabled
-    by default.
-
-  - Note that many MS-DOS CDROM drivers will still function even if
-    there are hardware problems with the interrupt setup; they
-    apparently don't use interrupts.
-
-  - If you own a Pioneer DR-A24X, you _will_ get nasty error messages
-    on boot such as "irq timeout: status=0x50 { DriveReady SeekComplete }"
-    The Pioneer DR-A24X CDROM drives are fairly popular these days.
-    Unfortunately, these drives seem to become very confused when we perform
-    the standard Linux ATA disk drive probe. If you own one of these drives,
-    you can bypass the ATA probing which confuses these CDROM drives, by
-    adding `append="hdX=noprobe hdX=cdrom"` to your lilo.conf file and running
-    lilo (again where X is the drive letter corresponding to where your drive
-    is installed.)
-
-c. System hangups.
-
-  - If the system locks up when you try to access the CDROM, the most
-    likely cause is that you have a buggy IDE adapter which doesn't
-    properly handle simultaneous transactions on multiple interfaces.
-    The most notorious of these is the CMD640B chip.  This problem can
-    be worked around by specifying the `serialize` option when
-    booting.  Recent kernels should be able to detect the need for
-    this automatically in most cases, but the detection is not
-    foolproof.  See Documentation/ide/ide.rst for more information
-    about the `serialize` option and the CMD640B.
-
-  - Note that many MS-DOS CDROM drivers will work with such buggy
-    hardware, apparently because they never attempt to overlap CDROM
-    operations with other disk activity.
-
-
-d. Can't mount a CDROM.
-
-  - If you get errors from mount, it may help to check `dmesg` to see
-    if there are any more specific errors from the driver or from the
-    filesystem.
-
-  - Make sure there's a CDROM loaded in the drive, and that's it's an
-    ISO 9660 disc.  You can't mount an audio CD.
-
-  - With the CDROM in the drive and unmounted, try something like::
-
-      cat /dev/cdrom | od | more
-
-    If you see a dump, then the drive and driver are probably working
-    OK, and the problem is at the filesystem level (i.e., the CDROM is
-    not ISO 9660 or has errors in the filesystem structure).
-
-  - If you see `not a block device` errors, check that the definitions
-    of the device special files are correct.  They should be as
-    follows::
-
-      brw-rw----   1 root     disk       3,   0 Nov 11 18:48 /dev/hda
-      brw-rw----   1 root     disk       3,  64 Nov 11 18:48 /dev/hdb
-      brw-rw----   1 root     disk      22,   0 Nov 11 18:48 /dev/hdc
-      brw-rw----   1 root     disk      22,  64 Nov 11 18:48 /dev/hdd
-
-    Some early Slackware releases had these defined incorrectly.  If
-    these are wrong, you can remake them by running the script
-    scripts/MAKEDEV.ide.  (You may have to make it executable
-    with chmod first.)
-
-    If you have a /dev/cdrom symbolic link, check that it is pointing
-    to the correct device file.
-
-    If you hear people talking of the devices `hd1a` and `hd1b`, these
-    were old names for what are now called hdc and hdd.  Those names
-    should be considered obsolete.
-
-  - If mount is complaining that the iso9660 filesystem is not
-    available, but you know it is (check /proc/filesystems), you
-    probably need a newer version of mount.  Early versions would not
-    always give meaningful error messages.
-
-
-e. Directory listings are unpredictably truncated, and `dmesg` shows
-   `buffer botch` error messages from the driver.
-
-  - There was a bug in the version of the driver in 1.2.x kernels
-    which could cause this.  It was fixed in 1.3.0.  If you can't
-    upgrade, you can probably work around the problem by specifying a
-    blocksize of 2048 when mounting.  (Note that you won't be able to
-    directly execute binaries off the CDROM in that case.)
-
-    If you see this in kernels later than 1.3.0, please report it as a
-    bug.
-
-
-f. Data corruption.
-
-  - Random data corruption was occasionally observed with the Hitachi
-    CDR-7730 CDROM. If you experience data corruption, using "hdx=slow"
-    as a command line parameter may work around the problem, at the
-    expense of low system performance.
-
-
-5. cdchange.c
--------------
-
-::
-
-  /*
-   * cdchange.c  [-v]  <device>  [<slot>]
-   *
-   * This loads a CDROM from a specified slot in a changer, and displays
-   * information about the changer status.  The drive should be unmounted before
-   * using this program.
-   *
-   * Changer information is displayed if either the -v flag is specified
-   * or no slot was specified.
-   *
-   * Based on code originally from Gerhard Zuber <zuber@berlin.snafu.de>.
-   * Changer status information, and rewrite for the new Uniform CDROM driver
-   * interface by Erik Andersen <andersee@debian.org>.
-   */
-
-  #include <stdio.h>
-  #include <stdlib.h>
-  #include <errno.h>
-  #include <string.h>
-  #include <unistd.h>
-  #include <fcntl.h>
-  #include <sys/ioctl.h>
-  #include <linux/cdrom.h>
-
-
-  int
-  main (int argc, char **argv)
-  {
-	char *program;
-	char *device;
-	int fd;           /* file descriptor for CD-ROM device */
-	int status;       /* return status for system calls */
-	int verbose = 0;
-	int slot=-1, x_slot;
-	int total_slots_available;
-
-	program = argv[0];
-
-	++argv;
-	--argc;
-
-	if (argc < 1 || argc > 3) {
-		fprintf (stderr, "usage: %s [-v] <device> [<slot>]\n",
-			 program);
-		fprintf (stderr, "       Slots are numbered 1 -- n.\n");
-		exit (1);
-	}
-
-       if (strcmp (argv[0], "-v") == 0) {
-                verbose = 1;
-                ++argv;
-                --argc;
-        }
-
-	device = argv[0];
-
-	if (argc == 2)
-		slot = atoi (argv[1]) - 1;
-
-	/* open device */
-	fd = open(device, O_RDONLY | O_NONBLOCK);
-	if (fd < 0) {
-		fprintf (stderr, "%s: open failed for `%s`: %s\n",
-			 program, device, strerror (errno));
-		exit (1);
-	}
-
-	/* Check CD player status */
-	total_slots_available = ioctl (fd, CDROM_CHANGER_NSLOTS);
-	if (total_slots_available <= 1 ) {
-		fprintf (stderr, "%s: Device `%s` is not an ATAPI "
-			"compliant CD changer.\n", program, device);
-		exit (1);
-	}
-
-	if (slot >= 0) {
-		if (slot >= total_slots_available) {
-			fprintf (stderr, "Bad slot number.  "
-				 "Should be 1 -- %d.\n",
-				 total_slots_available);
-			exit (1);
-		}
-
-		/* load */
-		slot=ioctl (fd, CDROM_SELECT_DISC, slot);
-		if (slot<0) {
-			fflush(stdout);
-				perror ("CDROM_SELECT_DISC ");
-			exit(1);
-		}
-	}
-
-	if (slot < 0 || verbose) {
-
-		status=ioctl (fd, CDROM_SELECT_DISC, CDSL_CURRENT);
-		if (status<0) {
-			fflush(stdout);
-			perror (" CDROM_SELECT_DISC");
-			exit(1);
-		}
-		slot=status;
-
-		printf ("Current slot: %d\n", slot+1);
-		printf ("Total slots available: %d\n",
-			total_slots_available);
-
-		printf ("Drive status: ");
-                status = ioctl (fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
-                if (status<0) {
-                  perror(" CDROM_DRIVE_STATUS");
-                } else switch(status) {
-		case CDS_DISC_OK:
-			printf ("Ready.\n");
-			break;
-		case CDS_TRAY_OPEN:
-			printf ("Tray Open.\n");
-			break;
-		case CDS_DRIVE_NOT_READY:
-			printf ("Drive Not Ready.\n");
-			break;
-		default:
-			printf ("This Should not happen!\n");
-			break;
-		}
-
-		for (x_slot=0; x_slot<total_slots_available; x_slot++) {
-			printf ("Slot %2d: ", x_slot+1);
-			status = ioctl (fd, CDROM_DRIVE_STATUS, x_slot);
-			if (status<0) {
-			     perror(" CDROM_DRIVE_STATUS");
-			} else switch(status) {
-			case CDS_DISC_OK:
-				printf ("Disc present.");
-				break;
-			case CDS_NO_DISC:
-				printf ("Empty slot.");
-				break;
-			case CDS_TRAY_OPEN:
-				printf ("CD-ROM tray open.\n");
-				break;
-			case CDS_DRIVE_NOT_READY:
-				printf ("CD-ROM drive not ready.\n");
-				break;
-			case CDS_NO_INFO:
-				printf ("No Information available.");
-				break;
-			default:
-				printf ("This Should not happen!\n");
-				break;
-			}
-		  if (slot == x_slot) {
-                  status = ioctl (fd, CDROM_DISC_STATUS);
-                  if (status<0) {
-			perror(" CDROM_DISC_STATUS");
-                  }
-		  switch (status) {
-			case CDS_AUDIO:
-				printf ("\tAudio disc.\t");
-				break;
-			case CDS_DATA_1:
-			case CDS_DATA_2:
-				printf ("\tData disc type %d.\t", status-CDS_DATA_1+1);
-				break;
-			case CDS_XA_2_1:
-			case CDS_XA_2_2:
-				printf ("\tXA data disc type %d.\t", status-CDS_XA_2_1+1);
-				break;
-			default:
-				printf ("\tUnknown disc type 0x%x!\t", status);
-				break;
-			}
-			}
-			status = ioctl (fd, CDROM_MEDIA_CHANGED, x_slot);
-			if (status<0) {
-				perror(" CDROM_MEDIA_CHANGED");
-			}
-			switch (status) {
-			case 1:
-				printf ("Changed.\n");
-				break;
-			default:
-				printf ("\n");
-				break;
-			}
-		}
-	}
-
-	/* close device */
-	status = close (fd);
-	if (status != 0) {
-		fprintf (stderr, "%s: close failed for `%s`: %s\n",
-			 program, device, strerror (errno));
-		exit (1);
-	}
-
-	exit (0);
-  }
diff --git a/Documentation/cdrom/index.rst b/Documentation/cdrom/index.rst
index 338ad5f94e7c..e87a8785bc1a 100644
--- a/Documentation/cdrom/index.rst
+++ b/Documentation/cdrom/index.rst
@@ -8,7 +8,6 @@ cdrom
     :maxdepth: 1
 
     cdrom-standard
-    ide-cd
     packet-writing
 
 .. only::  subproject and html
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 972d46a5ddf6..dedd4d853329 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -18,8 +18,10 @@ it.
 
    kernel-api
    workqueue
+   watch_queue
    printk-basics
    printk-formats
+   printk-index
    symbol-namespaces
 
 Data structures and low-level utilities
diff --git a/Documentation/core-api/printk-index.rst b/Documentation/core-api/printk-index.rst
new file mode 100644
index 000000000000..3062f37d119b
--- /dev/null
+++ b/Documentation/core-api/printk-index.rst
@@ -0,0 +1,137 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Printk Index
+============
+
+There are many ways how to monitor the state of the system. One important
+source of information is the system log. It provides a lot of information,
+including more or less important warnings and error messages.
+
+There are monitoring tools that filter and take action based on messages
+logged.
+
+The kernel messages are evolving together with the code. As a result,
+particular kernel messages are not KABI and never will be!
+
+It is a huge challenge for maintaining the system log monitors. It requires
+knowing what messages were updated in a particular kernel version and why.
+Finding these changes in the sources would require non-trivial parsers.
+Also it would require matching the sources with the binary kernel which
+is not always trivial. Various changes might be backported. Various kernel
+versions might be used on different monitored systems.
+
+This is where the printk index feature might become useful. It provides
+a dump of printk formats used all over the source code used for the kernel
+and modules on the running system. It is accessible at runtime via debugfs.
+
+The printk index helps to find changes in the message formats. Also it helps
+to track the strings back to the kernel sources and the related commit.
+
+
+User Interface
+==============
+
+The index of printk formats are split in into separate files. The files are
+named according to the binaries where the printk formats are built-in. There
+is always "vmlinux" and optionally also modules, for example::
+
+   /sys/kernel/debug/printk/index/vmlinux
+   /sys/kernel/debug/printk/index/ext4
+   /sys/kernel/debug/printk/index/scsi_mod
+
+Note that only loaded modules are shown. Also printk formats from a module
+might appear in "vmlinux" when the module is built-in.
+
+The content is inspired by the dynamic debug interface and looks like::
+
+   $> head -1 /sys/kernel/debug/printk/index/vmlinux; shuf -n 5 vmlinux
+   # <level[,flags]> filename:line function "format"
+   <5> block/blk-settings.c:661 disk_stack_limits "%s: Warning: Device %s is misaligned\n"
+   <4> kernel/trace/trace.c:8296 trace_create_file "Could not create tracefs '%s' entry\n"
+   <6> arch/x86/kernel/hpet.c:144 _hpet_print_config "hpet: %s(%d):\n"
+   <6> init/do_mounts.c:605 prepare_namespace "Waiting for root device %s...\n"
+   <6> drivers/acpi/osl.c:1410 acpi_no_auto_serialize_setup "ACPI: auto-serialization disabled\n"
+
+, where the meaning is:
+
+   - :level: log level value: 0-7 for particular severity, -1 as default,
+	'c' as continuous line without an explicit log level
+   - :flags: optional flags: currently only 'c' for KERN_CONT
+   - :filename\:line: source filename and line number of the related
+	printk() call. Note that there are many wrappers, for example,
+	pr_warn(), pr_warn_once(), dev_warn().
+   - :function: function name where the printk() call is used.
+   - :format: format string
+
+The extra information makes it a bit harder to find differences
+between various kernels. Especially the line number might change
+very often. On the other hand, it helps a lot to confirm that
+it is the same string or find the commit that is responsible
+for eventual changes.
+
+
+printk() Is Not a Stable KABI
+=============================
+
+Several developers are afraid that exporting all these implementation
+details into the user space will transform particular printk() calls
+into KABI.
+
+But it is exactly the opposite. printk() calls must _not_ be KABI.
+And the printk index helps user space tools to deal with this.
+
+
+Subsystem specific printk wrappers
+==================================
+
+The printk index is generated using extra metadata that are stored in
+a dedicated .elf section ".printk_index". It is achieved using macro
+wrappers doing __printk_index_emit() together with the real printk()
+call. The same technique is used also for the metadata used by
+the dynamic debug feature.
+
+The metadata are stored for a particular message only when it is printed
+using these special wrappers. It is implemented for the commonly
+used printk() calls, including, for example, pr_warn(), or pr_once().
+
+Additional changes are necessary for various subsystem specific wrappers
+that call the original printk() via a common helper function. These needs
+their own wrappers adding __printk_index_emit().
+
+Only few subsystem specific wrappers have been updated so far,
+for example, dev_printk(). As a result, the printk formats from
+some subsystes can be missing in the printk index.
+
+
+Subsystem specific prefix
+=========================
+
+The macro pr_fmt() macro allows to define a prefix that is printed
+before the string generated by the related printk() calls.
+
+Subsystem specific wrappers usually add even more complicated
+prefixes.
+
+These prefixes can be stored into the printk index metadata
+by an optional parameter of __printk_index_emit(). The debugfs
+interface might then show the printk formats including these prefixes.
+For example, drivers/acpi/osl.c contains::
+
+  #define pr_fmt(fmt) "ACPI: OSL: " fmt
+
+  static int __init acpi_no_auto_serialize_setup(char *str)
+  {
+	acpi_gbl_auto_serialize_methods = FALSE;
+	pr_info("Auto-serialization disabled\n");
+
+	return 1;
+  }
+
+This results in the following printk index entry::
+
+  <6> drivers/acpi/osl.c:1410 acpi_no_auto_serialize_setup "ACPI: auto-serialization disabled\n"
+
+It helps matching messages from the real log with printk index.
+Then the source file name, line number, and function name can
+be used to match the string with the source code.
diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst
index 729e24864fe7..22ec68f24421 100644
--- a/Documentation/core-api/timekeeping.rst
+++ b/Documentation/core-api/timekeeping.rst
@@ -132,6 +132,7 @@ Some additional variants exist for more specialized cases:
 .. c:function:: u64 ktime_get_mono_fast_ns( void )
 		u64 ktime_get_raw_fast_ns( void )
 		u64 ktime_get_boot_fast_ns( void )
+		u64 ktime_get_tai_fast_ns( void )
 		u64 ktime_get_real_fast_ns( void )
 
 	These variants are safe to call from any context, including from
diff --git a/Documentation/watch_queue.rst b/Documentation/core-api/watch_queue.rst
index 54f13ad5fc17..54f13ad5fc17 100644
--- a/Documentation/watch_queue.rst
+++ b/Documentation/core-api/watch_queue.rst
diff --git a/Documentation/dev-tools/ktap.rst b/Documentation/dev-tools/ktap.rst
index 5ee735c6687f..d0a9565b0f44 100644
--- a/Documentation/dev-tools/ktap.rst
+++ b/Documentation/dev-tools/ktap.rst
@@ -115,34 +115,32 @@ The diagnostic data field is optional, and results which have neither a
 directive nor any diagnostic data do not need to include the "#" field
 separator.
 
-Example result lines include:
-
-.. code-block:: none
+Example result lines include::
 
 	ok 1 test_case_name
 
 The test "test_case_name" passed.
 
-.. code-block:: none
+::
 
 	not ok 1 test_case_name
 
 The test "test_case_name" failed.
 
-.. code-block:: none
+::
 
 	ok 1 test # SKIP necessary dependency unavailable
 
 The test "test" was SKIPPED with the diagnostic message "necessary dependency
 unavailable".
 
-.. code-block:: none
+::
 
 	not ok 1 test # TIMEOUT 30 seconds
 
 The test "test" timed out, with diagnostic data "30 seconds".
 
-.. code-block:: none
+::
 
 	ok 5 check return code # rcode=0
 
@@ -202,7 +200,7 @@ allowed to be either indented or not indented.
 
 An example of a test with two nested subtests:
 
-.. code-block:: none
+::
 
 	KTAP version 1
 	1..1
@@ -215,7 +213,7 @@ An example of a test with two nested subtests:
 
 An example format with multiple levels of nested testing:
 
-.. code-block:: none
+::
 
 	KTAP version 1
 	1..2
@@ -250,7 +248,7 @@ nested version line, uses a line of the form
 
 Example KTAP output
 --------------------
-.. code-block:: none
+::
 
 	KTAP version 1
 	1..1
diff --git a/Documentation/dev-tools/kunit/api/index.rst b/Documentation/dev-tools/kunit/api/index.rst
index 3006cadcf44a..45ce04823f9f 100644
--- a/Documentation/dev-tools/kunit/api/index.rst
+++ b/Documentation/dev-tools/kunit/api/index.rst
@@ -6,6 +6,7 @@ API Reference
 .. toctree::
 
 	test
+	resource
 
 This section documents the KUnit kernel testing API. It is divided into the
 following sections:
@@ -13,3 +14,7 @@ following sections:
 Documentation/dev-tools/kunit/api/test.rst
 
  - documents all of the standard testing API
+
+Documentation/dev-tools/kunit/api/resource.rst
+
+ - documents the KUnit resource API
diff --git a/Documentation/dev-tools/kunit/api/resource.rst b/Documentation/dev-tools/kunit/api/resource.rst
new file mode 100644
index 000000000000..0a94f831259e
--- /dev/null
+++ b/Documentation/dev-tools/kunit/api/resource.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Resource API
+============
+
+This file documents the KUnit resource API.
+
+Most users won't need to use this API directly, power users can use it to store
+state on a per-test basis, register custom cleanup actions, and more.
+
+.. kernel-doc:: include/kunit/resource.h
+   :internal:
diff --git a/Documentation/dev-tools/kunit/architecture.rst b/Documentation/dev-tools/kunit/architecture.rst
index ff9c85a0bff2..cf9e6e3eeae4 100644
--- a/Documentation/dev-tools/kunit/architecture.rst
+++ b/Documentation/dev-tools/kunit/architecture.rst
@@ -125,7 +125,7 @@ All expectations/assertions are formatted as:
 		  ``void __noreturn kunit_try_catch_throw(struct kunit_try_catch *try_catch)``.
 
 		- ``kunit_try_catch_throw`` calls function:
-		  ``void complete_and_exit(struct completion *, long) __noreturn;``
+		  ``void kthread_complete_and_exit(struct completion *, long) __noreturn;``
 		  and terminates the special thread context.
 
 - ``<op>`` denotes a check with options: ``TRUE`` (supplied property
diff --git a/Documentation/dev-tools/kunit/running_tips.rst b/Documentation/dev-tools/kunit/running_tips.rst
index 7b6d26a25959..c36f6760087d 100644
--- a/Documentation/dev-tools/kunit/running_tips.rst
+++ b/Documentation/dev-tools/kunit/running_tips.rst
@@ -114,6 +114,7 @@ Instead of enabling ``CONFIG_GCOV_KERNEL=y``, we can set these options:
 
 	CONFIG_DEBUG_KERNEL=y
 	CONFIG_DEBUG_INFO=y
+	CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 	CONFIG_GCOV=y
 
 
@@ -122,7 +123,7 @@ Putting it together into a copy-pastable sequence of commands:
 .. code-block:: bash
 
 	# Append coverage options to the current config
-	$ echo -e "CONFIG_DEBUG_KERNEL=y\nCONFIG_DEBUG_INFO=y\nCONFIG_GCOV=y" >> .kunit/.kunitconfig
+	$ echo -e "CONFIG_DEBUG_KERNEL=y\nCONFIG_DEBUG_INFO=y\nCONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y\nCONFIG_GCOV=y" >> .kunit/.kunitconfig
 	$ ./tools/testing/kunit/kunit.py run
 	# Extract the coverage information from the build dir (.kunit/)
 	$ lcov -t "my_kunit_tests" -o coverage.info -c -d .kunit/
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
index 1c83e7d60a8a..d62a04255c2e 100644
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -125,8 +125,8 @@ We need many test cases covering all the unit's behaviors. It is common to have
 many similar tests. In order to reduce duplication in these closely related
 tests, most unit testing frameworks (including KUnit) provide the concept of a
 *test suite*. A test suite is a collection of test cases for a unit of code
-with a setup function that gets invoked before every test case and then a tear
-down function that gets invoked after every test case completes. For example:
+with optional setup and teardown functions that run before/after the whole
+suite and/or every test case. For example:
 
 .. code-block:: c
 
@@ -141,16 +141,19 @@ down function that gets invoked after every test case completes. For example:
 		.name = "example",
 		.init = example_test_init,
 		.exit = example_test_exit,
+		.suite_init = example_suite_init,
+		.suite_exit = example_suite_exit,
 		.test_cases = example_test_cases,
 	};
 	kunit_test_suite(example_test_suite);
 
-In the above example, the test suite ``example_test_suite`` would run the test
-cases ``example_test_foo``, ``example_test_bar``, and ``example_test_baz``. Each
-would have ``example_test_init`` called immediately before it and
-``example_test_exit`` called immediately after it.
-``kunit_test_suite(example_test_suite)`` registers the test suite with the
-KUnit test framework.
+In the above example, the test suite ``example_test_suite`` would first run
+``example_suite_init``, then run the test cases ``example_test_foo``,
+``example_test_bar``, and ``example_test_baz``. Each would have
+``example_test_init`` called immediately before it and ``example_test_exit``
+called immediately after it. Finally, ``example_suite_exit`` would be called
+after everything else. ``kunit_test_suite(example_test_suite)`` registers the
+test suite with the KUnit test framework.
 
 .. note::
    A test case will only run if it is associated with a test suite.
diff --git a/Documentation/dev-tools/testing-overview.rst b/Documentation/dev-tools/testing-overview.rst
index 65feb81edb14..0aaf6ea53608 100644
--- a/Documentation/dev-tools/testing-overview.rst
+++ b/Documentation/dev-tools/testing-overview.rst
@@ -115,3 +115,66 @@ that none of these errors are occurring during the test.
 Some of these tools integrate with KUnit or kselftest and will
 automatically fail tests if an issue is detected.
 
+Static Analysis Tools
+=====================
+
+In addition to testing a running kernel, one can also analyze kernel source code
+directly (**at compile time**) using **static analysis** tools. The tools
+commonly used in the kernel allow one to inspect the whole source tree or just
+specific files within it. They make it easier to detect and fix problems during
+the development process.
+
+Sparse can help test the kernel by performing type-checking, lock checking,
+value range checking, in addition to reporting various errors and warnings while
+examining the code. See the Documentation/dev-tools/sparse.rst documentation
+page for details on how to use it.
+
+Smatch extends Sparse and provides additional checks for programming logic
+mistakes such as missing breaks in switch statements, unused return values on
+error checking, forgetting to set an error code in the return of an error path,
+etc. Smatch also has tests against more serious issues such as integer
+overflows, null pointer dereferences, and memory leaks. See the project page at
+http://smatch.sourceforge.net/.
+
+Coccinelle is another static analyzer at our disposal. Coccinelle is often used
+to aid refactoring and collateral evolution of source code, but it can also help
+to avoid certain bugs that occur in common code patterns. The types of tests
+available include API tests, tests for correct usage of kernel iterators, checks
+for the soundness of free operations, analysis of locking behavior, and further
+tests known to help keep consistent kernel usage. See the
+Documentation/dev-tools/coccinelle.rst documentation page for details.
+
+Beware, though, that static analysis tools suffer from **false positives**.
+Errors and warns need to be evaluated carefully before attempting to fix them.
+
+When to use Sparse and Smatch
+-----------------------------
+
+Sparse does type checking, such as verifying that annotated variables do not
+cause endianness bugs, detecting places that use ``__user`` pointers improperly,
+and analyzing the compatibility of symbol initializers.
+
+Smatch does flow analysis and, if allowed to build the function database, it
+also does cross function analysis. Smatch tries to answer questions like where
+is this buffer allocated? How big is it? Can this index be controlled by the
+user? Is this variable larger than that variable?
+
+It's generally easier to write checks in Smatch than it is to write checks in
+Sparse. Nevertheless, there are some overlaps between Sparse and Smatch checks.
+
+Strong points of Smatch and Coccinelle
+--------------------------------------
+
+Coccinelle is probably the easiest for writing checks. It works before the
+pre-processor so it's easier to check for bugs in macros using Coccinelle.
+Coccinelle also creates patches for you, which no other tool does.
+
+For example, with Coccinelle you can do a mass conversion from
+``kmalloc(x * size, GFP_KERNEL)`` to ``kmalloc_array(x, size, GFP_KERNEL)``, and
+that's really useful. If you just created a Smatch warning and try to push the
+work of converting on to the maintainers they would be annoyed. You'd have to
+argue about each warning if can really overflow or not.
+
+Coccinelle does no analysis of variable values, which is the strong point of
+Smatch. On the other hand, Coccinelle allows you to do simple things in a simple
+way.
diff --git a/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml b/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
index c060c7914cae..c4e4a9eab658 100644
--- a/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
+++ b/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
@@ -26,6 +26,7 @@ properties:
       - items:
           - enum:
               - renesas,sata-r8a774b1     # RZ/G2N
+              - renesas,sata-r8a774e1     # RZ/G2H
               - renesas,sata-r8a7795      # R-Car H3
               - renesas,sata-r8a77965     # R-Car M3-N
           - const: renesas,rcar-gen3-sata # generic R-Car Gen3 or RZ/G2
diff --git a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
deleted file mode 100644
index 58fc8a6cebc7..000000000000
--- a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
+++ /dev/null
@@ -1,212 +0,0 @@
-* Rockchip rk3399 DMC (Dynamic Memory Controller) device
-
-Required properties:
-- compatible:		 Must be "rockchip,rk3399-dmc".
-- devfreq-events:	 Node to get DDR loading, Refer to
-			 Documentation/devicetree/bindings/devfreq/event/
-			 rockchip-dfi.txt
-- clocks:		 Phandles for clock specified in "clock-names" property
-- clock-names :		 The name of clock used by the DFI, must be
-			 "pclk_ddr_mon";
-- operating-points-v2:	 Refer to Documentation/devicetree/bindings/opp/opp-v2.yaml
-			 for details.
-- center-supply:	 DMC supply node.
-- status:		 Marks the node enabled/disabled.
-- rockchip,pmu:		 Phandle to the syscon managing the "PMU general register
-			 files".
-
-Optional properties:
-- interrupts:		 The CPU interrupt number. The interrupt specifier
-			 format depends on the interrupt controller.
-			 It should be a DCF interrupt. When DDR DVFS finishes
-			 a DCF interrupt is triggered.
-- rockchip,pmu:		 Phandle to the syscon managing the "PMU general register
-			 files".
-
-Following properties relate to DDR timing:
-
-- rockchip,dram_speed_bin :	  Value reference include/dt-bindings/clock/rk3399-ddr.h,
-				  it selects the DDR3 cl-trp-trcd type. It must be
-				  set according to "Speed Bin" in DDR3 datasheet,
-				  DO NOT use a smaller "Speed Bin" than specified
-				  for the DDR3 being used.
-
-- rockchip,pd_idle :		  Configure the PD_IDLE value. Defines the
-				  power-down idle period in which memories are
-				  placed into power-down mode if bus is idle
-				  for PD_IDLE DFI clock cycles.
-
-- rockchip,sr_idle :		  Configure the SR_IDLE value. Defines the
-				  self-refresh idle period in which memories are
-				  placed into self-refresh mode if bus is idle
-				  for SR_IDLE * 1024 DFI clock cycles (DFI
-				  clocks freq is half of DRAM clock), default
-				  value is "0".
-
-- rockchip,sr_mc_gate_idle :	  Defines the memory self-refresh and controller
-				  clock gating idle period. Memories are placed
-				  into self-refresh mode and memory controller
-				  clock arg gating started if bus is idle for
-				  sr_mc_gate_idle*1024 DFI clock cycles.
-
-- rockchip,srpd_lite_idle :	  Defines the self-refresh power down idle
-				  period in which memories are placed into
-				  self-refresh power down mode if bus is idle
-				  for srpd_lite_idle * 1024 DFI clock cycles.
-				  This parameter is for LPDDR4 only.
-
-- rockchip,standby_idle :	  Defines the standby idle period in which
-				  memories are placed into self-refresh mode.
-				  The controller, pi, PHY and DRAM clock will
-				  be gated if bus is idle for standby_idle * DFI
-				  clock cycles.
-
-- rockchip,dram_dll_dis_freq :	  Defines the DDR3 DLL bypass frequency in MHz.
-				  When DDR frequency is less than DRAM_DLL_DISB_FREQ,
-				  DDR3 DLL will be bypassed. Note: if DLL was bypassed,
-				  the odt will also stop working.
-
-- rockchip,phy_dll_dis_freq :	  Defines the PHY dll bypass frequency in
-				  MHz (Mega Hz). When DDR frequency is less than
-				  DRAM_DLL_DISB_FREQ, PHY DLL will be bypassed.
-				  Note: PHY DLL and PHY ODT are independent.
-
-- rockchip,ddr3_odt_dis_freq :	  When the DRAM type is DDR3, this parameter defines
-				  the ODT disable frequency in MHz (Mega Hz).
-				  when the DDR frequency is  less then ddr3_odt_dis_freq,
-				  the ODT on the DRAM side and controller side are
-				  both disabled.
-
-- rockchip,ddr3_drv :		  When the DRAM type is DDR3, this parameter defines
-				  the DRAM side driver strength in ohms. Default
-				  value is 40.
-
-- rockchip,ddr3_odt :		  When the DRAM type is DDR3, this parameter defines
-				  the DRAM side ODT strength in ohms. Default value
-				  is 120.
-
-- rockchip,phy_ddr3_ca_drv :	  When the DRAM type is DDR3, this parameter defines
-				  the phy side CA line (incluing command line,
-				  address line and clock line) driver strength.
-				  Default value is 40.
-
-- rockchip,phy_ddr3_dq_drv :	  When the DRAM type is DDR3, this parameter defines
-				  the PHY side DQ line (including DQS/DQ/DM line)
-				  driver strength. Default value is 40.
-
-- rockchip,phy_ddr3_odt : 	  When the DRAM type is DDR3, this parameter defines
-				  the PHY side ODT strength. Default value is 240.
-
-- rockchip,lpddr3_odt_dis_freq : When the DRAM type is LPDDR3, this parameter defines
-				  then ODT disable frequency in MHz (Mega Hz).
-				  When DDR frequency is less then ddr3_odt_dis_freq,
-				  the ODT on the DRAM side and controller side are
-				  both disabled.
-
-- rockchip,lpddr3_drv :		  When the DRAM type is LPDDR3, this parameter defines
-				  the DRAM side driver strength in ohms. Default
-				  value is 34.
-
-- rockchip,lpddr3_odt :		  When the DRAM type is LPDDR3, this parameter defines
-				  the DRAM side ODT strength in ohms. Default value
-				  is 240.
-
-- rockchip,phy_lpddr3_ca_drv :	  When the DRAM type is LPDDR3, this parameter defines
-				  the PHY side CA line (including command line,
-				  address line and clock line) driver strength.
-				  Default value is 40.
-
-- rockchip,phy_lpddr3_dq_drv :	  When the DRAM type is LPDDR3, this parameter defines
-				  the PHY side DQ line (including DQS/DQ/DM line)
-				  driver strength. Default value is 40.
-
-- rockchip,phy_lpddr3_odt : 	  When dram type is LPDDR3, this parameter define
-				  the phy side odt strength, default value is 240.
-
-- rockchip,lpddr4_odt_dis_freq : When the DRAM type is LPDDR4, this parameter
-				  defines the ODT disable frequency in
-				  MHz (Mega Hz). When the DDR frequency is less then
-				  ddr3_odt_dis_freq, the ODT on the DRAM side and
-				  controller side are both disabled.
-
-- rockchip,lpddr4_drv :		  When the DRAM type is LPDDR4, this parameter defines
-				  the DRAM side driver strength in ohms. Default
-				  value is 60.
-
-- rockchip,lpddr4_dq_odt : 	  When the DRAM type is LPDDR4, this parameter defines
-				  the DRAM side ODT on DQS/DQ line strength in ohms.
-				  Default value is 40.
-
-- rockchip,lpddr4_ca_odt :	  When the DRAM type is LPDDR4, this parameter defines
-				  the DRAM side ODT on CA line strength in ohms.
-				  Default value is 40.
-
-- rockchip,phy_lpddr4_ca_drv :	  When the DRAM type is LPDDR4, this parameter defines
-				  the PHY side CA line (including command address
-				  line) driver strength. Default value is 40.
-
-- rockchip,phy_lpddr4_ck_cs_drv : When the DRAM type is LPDDR4, this parameter defines
-				  the PHY side clock line and CS line driver
-				  strength. Default value is 80.
-
-- rockchip,phy_lpddr4_dq_drv :	  When the DRAM type is LPDDR4, this parameter defines
-				  the PHY side DQ line (including DQS/DQ/DM line)
-				  driver strength. Default value is 80.
-
-- rockchip,phy_lpddr4_odt :	  When the DRAM type is LPDDR4, this parameter defines
-				  the PHY side ODT strength. Default value is 60.
-
-Example:
-	dmc_opp_table: dmc_opp_table {
-		compatible = "operating-points-v2";
-
-		opp00 {
-			opp-hz = /bits/ 64 <300000000>;
-			opp-microvolt = <900000>;
-		};
-		opp01 {
-			opp-hz = /bits/ 64 <666000000>;
-			opp-microvolt = <900000>;
-		};
-	};
-
-	dmc: dmc {
-		compatible = "rockchip,rk3399-dmc";
-		devfreq-events = <&dfi>;
-		interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&cru SCLK_DDRC>;
-		clock-names = "dmc_clk";
-		operating-points-v2 = <&dmc_opp_table>;
-		center-supply = <&ppvar_centerlogic>;
-		upthreshold = <15>;
-		downdifferential = <10>;
-		rockchip,ddr3_speed_bin = <21>;
-		rockchip,pd_idle = <0x40>;
-		rockchip,sr_idle = <0x2>;
-		rockchip,sr_mc_gate_idle = <0x3>;
-		rockchip,srpd_lite_idle	= <0x4>;
-		rockchip,standby_idle = <0x2000>;
-		rockchip,dram_dll_dis_freq = <300>;
-		rockchip,phy_dll_dis_freq = <125>;
-		rockchip,auto_pd_dis_freq = <666>;
-		rockchip,ddr3_odt_dis_freq = <333>;
-		rockchip,ddr3_drv = <40>;
-		rockchip,ddr3_odt = <120>;
-		rockchip,phy_ddr3_ca_drv = <40>;
-		rockchip,phy_ddr3_dq_drv = <40>;
-		rockchip,phy_ddr3_odt = <240>;
-		rockchip,lpddr3_odt_dis_freq = <333>;
-		rockchip,lpddr3_drv = <34>;
-		rockchip,lpddr3_odt = <240>;
-		rockchip,phy_lpddr3_ca_drv = <40>;
-		rockchip,phy_lpddr3_dq_drv = <40>;
-		rockchip,phy_lpddr3_odt = <240>;
-		rockchip,lpddr4_odt_dis_freq = <333>;
-		rockchip,lpddr4_drv = <60>;
-		rockchip,lpddr4_dq_odt = <40>;
-		rockchip,lpddr4_ca_odt = <40>;
-		rockchip,phy_lpddr4_ca_drv = <40>;
-		rockchip,phy_lpddr4_ck_cs_drv = <80>;
-		rockchip,phy_lpddr4_dq_drv = <80>;
-		rockchip,phy_lpddr4_odt = <60>;
-	};
diff --git a/Documentation/devicetree/bindings/hwmon/adt7475.yaml b/Documentation/devicetree/bindings/hwmon/adt7475.yaml
index 7d9c083632b9..22beb37f1bf1 100644
--- a/Documentation/devicetree/bindings/hwmon/adt7475.yaml
+++ b/Documentation/devicetree/bindings/hwmon/adt7475.yaml
@@ -61,6 +61,26 @@ patternProperties:
     $ref: /schemas/types.yaml#/definitions/uint32
     enum: [0, 1]
 
+  "adi,pin(5|10)-function":
+    description: |
+      Configures the function for pin 5 on the adi,adt7473 and adi,adt7475. Or
+      pin 10 on the adi,adt7476 and adi,adt7490.
+    $ref: /schemas/types.yaml#/definitions/string
+    enum:
+      - pwm2
+      - smbalert#
+
+  "adi,pin(9|14)-function":
+    description: |
+      Configures the function for pin 9 on the adi,adt7473 and adi,adt7475. Or
+      pin 14 on the adi,adt7476 and adi,adt7490
+    $ref: /schemas/types.yaml#/definitions/string
+    enum:
+      - tach4
+      - therm#
+      - smbalert#
+      - gpio
+
 required:
   - compatible
   - reg
@@ -79,6 +99,8 @@ examples:
         adi,bypass-attenuator-in0 = <1>;
         adi,bypass-attenuator-in1 = <0>;
         adi,pwm-active-state = <1 0 1>;
+        adi,pin10-function = "smbalert#";
+        adi,pin14-function = "tach4";
       };
     };
 
diff --git a/Documentation/devicetree/bindings/hwmon/lm75.yaml b/Documentation/devicetree/bindings/hwmon/lm75.yaml
index 72980d083c21..8226e3b5d028 100644
--- a/Documentation/devicetree/bindings/hwmon/lm75.yaml
+++ b/Documentation/devicetree/bindings/hwmon/lm75.yaml
@@ -14,6 +14,7 @@ properties:
   compatible:
     enum:
       - adi,adt75
+      - atmel,at30ts74
       - dallas,ds1775
       - dallas,ds75
       - dallas,ds7505
diff --git a/Documentation/devicetree/bindings/hwmon/microchip,lan966x.yaml b/Documentation/devicetree/bindings/hwmon/microchip,lan966x.yaml
new file mode 100644
index 000000000000..390dd6755ff5
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/microchip,lan966x.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/hwmon/microchip,lan966x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip LAN966x Hardware Monitor
+
+maintainers:
+  - Michael Walle <michael@walle.cc>
+
+description: |
+  Microchip LAN966x temperature monitor and fan controller
+
+properties:
+  compatible:
+    enum:
+      - microchip,lan9668-hwmon
+
+  reg:
+    items:
+      - description: PVT registers
+      - description: FAN registers
+
+  reg-names:
+    items:
+      - const: pvt
+      - const: fan
+
+  clocks:
+    maxItems: 1
+
+  '#thermal-sensor-cells':
+    const: 0
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    hwmon: hwmon@e2010180 {
+        compatible = "microchip,lan9668-hwmon";
+        reg = <0xe2010180 0xc>,
+              <0xe20042a8 0xc>;
+        reg-names = "pvt", "fan";
+        clocks = <&sys_clk>;
+        #thermal-sensor-cells = <0>;
+    };
diff --git a/Documentation/devicetree/bindings/hwmon/national,lm90.yaml b/Documentation/devicetree/bindings/hwmon/national,lm90.yaml
index 30db92977937..b04657849852 100644
--- a/Documentation/devicetree/bindings/hwmon/national,lm90.yaml
+++ b/Documentation/devicetree/bindings/hwmon/national,lm90.yaml
@@ -34,6 +34,7 @@ properties:
       - nxp,sa56004
       - onnn,nct1008
       - ti,tmp451
+      - ti,tmp461
       - winbond,w83l771
 
 
@@ -52,10 +53,29 @@ properties:
   vcc-supply:
     description: phandle to the regulator that provides the +VCC supply
 
+  ti,extended-range-enable:
+    description: Set to enable extended range temperature.
+    type: boolean
+
 required:
   - compatible
   - reg
 
+allOf:
+  - if:
+      not:
+        properties:
+          compatible:
+            contains:
+              enum:
+                - adi,adt7461
+                - adi,adt7461a
+                - ti,tmp451
+                - ti,tmp461
+    then:
+      properties:
+        ti,extended-range-enable: false
+
 additionalProperties: false
 
 examples:
diff --git a/Documentation/devicetree/bindings/hwmon/nuvoton,nct6775.yaml b/Documentation/devicetree/bindings/hwmon/nuvoton,nct6775.yaml
new file mode 100644
index 000000000000..358b262431fc
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/nuvoton,nct6775.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+
+$id: http://devicetree.org/schemas/hwmon/nuvoton,nct6775.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Nuvoton NCT6775 and compatible Super I/O chips
+
+maintainers:
+  - Zev Weiss <zev@bewilderbeest.net>
+
+properties:
+  compatible:
+    enum:
+      - nuvoton,nct6106
+      - nuvoton,nct6116
+      - nuvoton,nct6775
+      - nuvoton,nct6776
+      - nuvoton,nct6779
+      - nuvoton,nct6791
+      - nuvoton,nct6792
+      - nuvoton,nct6793
+      - nuvoton,nct6795
+      - nuvoton,nct6796
+      - nuvoton,nct6797
+      - nuvoton,nct6798
+
+  reg:
+    maxItems: 1
+
+  nuvoton,tsi-channel-mask:
+    description:
+      Bitmask indicating which TSI temperature sensor channels are
+      active.  LSB is TSI0, bit 1 is TSI1, etc.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    maximum: 0xff
+    default: 0
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        superio@4d {
+            compatible = "nuvoton,nct6779";
+            reg = <0x4d>;
+            nuvoton,tsi-channel-mask = <0x03>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml
new file mode 100644
index 000000000000..fe0ac08faa1a
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/hwmon/ti,tmp401.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TMP401, TPM411 and TMP43x temperature sensor
+
+maintainers:
+  - Guenter Roeck <linux@roeck-us.net>
+
+description: |
+  ±1°C Remote and Local temperature sensor
+
+  Datasheets:
+  https://www.ti.com/lit/ds/symlink/tmp401.pdf
+  https://www.ti.com/lit/ds/symlink/tmp411.pdf
+  https://www.ti.com/lit/ds/symlink/tmp431.pdf
+  https://www.ti.com/lit/ds/symlink/tmp435.pdf
+
+properties:
+  compatible:
+    enum:
+      - ti,tmp401
+      - ti,tmp411
+      - ti,tmp431
+      - ti,tmp432
+      - ti,tmp435
+
+  reg:
+    maxItems: 1
+
+  ti,extended-range-enable:
+    description:
+      When set, this sensor measures over extended temperature range.
+    type: boolean
+
+  ti,n-factor:
+    description:
+      value to be used for converting remote channel measurements to
+      temperature.
+    $ref: /schemas/types.yaml#/definitions/int32
+    items:
+      minimum: -128
+      maximum: 127
+
+  ti,beta-compensation:
+    description:
+      value to select beta correction range.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 15
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - ti,tmp401
+    then:
+      properties:
+        ti,n-factor: false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - ti,tmp401
+              - ti,tmp411
+    then:
+      properties:
+        ti,beta-compensation: false
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      sensor@4c {
+        compatible = "ti,tmp401";
+        reg = <0x4c>;
+      };
+    };
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      sensor@4c {
+        compatible = "ti,tmp431";
+        reg = <0x4c>;
+        ti,extended-range-enable;
+        ti,n-factor = <0x3b>;
+        ti,beta-compensation = <0x7>;
+      };
+    };
diff --git a/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml b/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml
index b1770640f94b..03ebd2665d07 100644
--- a/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml
+++ b/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Mediatek's Keypad Controller device tree bindings
 
 maintainers:
-  - Fengping Yu <fengping.yu@mediatek.com>
+  - Mattijs Korpershoek <mkorpershoek@baylibre.com>
 
 allOf:
   - $ref: "/schemas/input/matrix-keymap.yaml#"
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
index b7197f78e158..3912a89162f0 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ARM Generic Interrupt Controller, version 3
 
 maintainers:
-  - Marc Zyngier <marc.zyngier@arm.com>
+  - Marc Zyngier <maz@kernel.org>
 
 description: |
   AArch64 SMP cores are often associated with a GICv3, providing Private
@@ -78,7 +78,11 @@ properties:
       - GIC Hypervisor interface (GICH)
       - GIC Virtual CPU interface (GICV)
 
-      GICC, GICH and GICV are optional.
+      GICC, GICH and GICV are optional, but must be described if the CPUs
+      support them. Examples of such CPUs are ARM's implementations of the
+      ARMv8.0 architecture such as Cortex-A32, A34, A35, A53, A57, A72 and
+      A73 (this list is not exhaustive).
+
     minItems: 2
     maxItems: 4096   # Should be enough?
 
diff --git a/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9807-vcm.txt b/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9807-vcm.txt
deleted file mode 100644
index c4701f1eaaf6..000000000000
--- a/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9807-vcm.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-Dongwoon Anatech DW9807 voice coil lens driver
-
-DW9807 is a 10-bit DAC with current sink capability. It is intended for
-controlling voice coil lenses.
-
-Mandatory properties:
-
-- compatible: "dongwoon,dw9807-vcm"
-- reg: I2C slave address
diff --git a/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9807-vcm.yaml b/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9807-vcm.yaml
new file mode 100644
index 000000000000..aae246ca3fcf
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9807-vcm.yaml
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2018, 2021 Intel Corporation
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/i2c/dongwoon,dw9807-vcm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Dongwoon Anatech DW9807 voice coil lens driver
+
+maintainers:
+  - Sakari Ailus <sakari.ailus@linux.intel.com>
+
+description: |
+  DW9807 is a 10-bit DAC with current sink capability. It is intended for
+  controlling voice coil lenses.
+
+properties:
+  compatible:
+    const: dongwoon,dw9807-vcm
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        lens@e {
+            compatible = "dongwoon,dw9807-vcm";
+            reg = <0x0e>;
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/media/i2c/sony,imx412.yaml b/Documentation/devicetree/bindings/media/i2c/sony,imx412.yaml
index afcf70947f7e..26d1807d0bb6 100644
--- a/Documentation/devicetree/bindings/media/i2c/sony,imx412.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/sony,imx412.yaml
@@ -32,6 +32,15 @@ properties:
     description: Clock frequency 6MHz, 12MHz, 18MHz, 24MHz or 27MHz
     maxItems: 1
 
+  dovdd-supply:
+    description: Interface power supply.
+
+  avdd-supply:
+    description: Analog power supply.
+
+  dvdd-supply:
+    description: Digital power supply.
+
   reset-gpios:
     description: Reference to the GPIO connected to the XCLR pin, if any.
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml b/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml
index deb5b657a2d5..d36fcca04cbc 100644
--- a/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml
+++ b/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml
@@ -63,6 +63,9 @@ properties:
     description:
       Describes point to scp.
 
+  power-domains:
+    maxItems: 1
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/media/mediatek,vcodec-subdev-decoder.yaml b/Documentation/devicetree/bindings/media/mediatek,vcodec-subdev-decoder.yaml
index c73bf2352aca..440646e44c0d 100644
--- a/Documentation/devicetree/bindings/media/mediatek,vcodec-subdev-decoder.yaml
+++ b/Documentation/devicetree/bindings/media/mediatek,vcodec-subdev-decoder.yaml
@@ -47,7 +47,9 @@ description: |
 
 properties:
   compatible:
-    const: mediatek,mt8192-vcodec-dec
+    enum:
+      - mediatek,mt8192-vcodec-dec
+      - mediatek,mt8186-vcodec-dec
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/media/microchip,xisc.yaml b/Documentation/devicetree/bindings/media/microchip,xisc.yaml
index 086e1430af4f..3be8f64c3e21 100644
--- a/Documentation/devicetree/bindings/media/microchip,xisc.yaml
+++ b/Documentation/devicetree/bindings/media/microchip,xisc.yaml
@@ -67,7 +67,7 @@ properties:
           remote-endpoint: true
 
           bus-width:
-            enum: [8, 9, 10, 11, 12]
+            enum: [8, 9, 10, 11, 12, 14]
             default: 12
 
           hsync-active:
diff --git a/Documentation/devicetree/bindings/media/rockchip,vdec.yaml b/Documentation/devicetree/bindings/media/rockchip,vdec.yaml
index 089f11d21b25..3bcfb8e12333 100644
--- a/Documentation/devicetree/bindings/media/rockchip,vdec.yaml
+++ b/Documentation/devicetree/bindings/media/rockchip,vdec.yaml
@@ -18,7 +18,9 @@ properties:
     oneOf:
       - const: rockchip,rk3399-vdec
       - items:
-          - const: rockchip,rk3228-vdec
+          - enum:
+              - rockchip,rk3228-vdec
+              - rockchip,rk3328-vdec
           - const: rockchip,rk3399-vdec
 
   reg:
diff --git a/Documentation/devicetree/bindings/media/rockchip-vpu.yaml b/Documentation/devicetree/bindings/media/rockchip-vpu.yaml
index bacb60a34989..6cc4d3e5a61d 100644
--- a/Documentation/devicetree/bindings/media/rockchip-vpu.yaml
+++ b/Documentation/devicetree/bindings/media/rockchip-vpu.yaml
@@ -23,6 +23,7 @@ properties:
           - rockchip,rk3328-vpu
           - rockchip,rk3399-vpu
           - rockchip,px30-vpu
+          - rockchip,rk3568-vpu
       - items:
           - const: rockchip,rk3188-vpu
           - const: rockchip,rk3066-vpu
diff --git a/Documentation/devicetree/bindings/media/video-interfaces.yaml b/Documentation/devicetree/bindings/media/video-interfaces.yaml
index 4391dce2caee..68c3b9871cf3 100644
--- a/Documentation/devicetree/bindings/media/video-interfaces.yaml
+++ b/Documentation/devicetree/bindings/media/video-interfaces.yaml
@@ -93,6 +93,7 @@ properties:
       - 4 # MIPI CSI-2 D-PHY
       - 5 # Parallel
       - 6 # BT.656
+      - 7 # DPI
     description:
       Data bus type.
 
diff --git a/Documentation/devicetree/bindings/memory-controllers/fsl/fsl,ddr.yaml b/Documentation/devicetree/bindings/memory-controllers/fsl/fsl,ddr.yaml
index af5147f9da72..84f778a99546 100644
--- a/Documentation/devicetree/bindings/memory-controllers/fsl/fsl,ddr.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/fsl/fsl,ddr.yaml
@@ -25,12 +25,6 @@ properties:
           - const: fsl,qoriq-memory-controller
       - enum:
           - fsl,bsc9132-memory-controller
-          - fsl,8540-memory-controller
-          - fsl,8541-memory-controller
-          - fsl,8544-memory-controller
-          - fsl,8548-memory-controller
-          - fsl,8555-memory-controller
-          - fsl,8568-memory-controller
           - fsl,mpc8536-memory-controller
           - fsl,mpc8540-memory-controller
           - fsl,mpc8541-memory-controller
diff --git a/Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml b/Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml
new file mode 100644
index 000000000000..fb4920397d08
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml
@@ -0,0 +1,384 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# %YAML 1.2
+---
+$id: http://devicetree.org/schemas/memory-controllers/rockchip,rk3399-dmc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip rk3399 DMC (Dynamic Memory Controller) device
+
+maintainers:
+  - Brian Norris <briannorris@chromium.org>
+
+properties:
+  compatible:
+    enum:
+      - rockchip,rk3399-dmc
+
+  devfreq-events:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Node to get DDR loading. Refer to
+      Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt.
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: dmc_clk
+
+  operating-points-v2: true
+
+  center-supply:
+    description:
+      DMC regulator supply.
+
+  rockchip,pmu:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to the syscon managing the "PMU general register files".
+
+  interrupts:
+    maxItems: 1
+    description:
+      The CPU interrupt number. It should be a DCF interrupt. When DDR DVFS
+      finishes, a DCF interrupt is triggered.
+
+  rockchip,ddr3_speed_bin:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      For values, reference include/dt-bindings/clock/rk3399-ddr.h. Selects the
+      DDR3 cl-trp-trcd type. It must be set according to "Speed Bin" in DDR3
+      datasheet; DO NOT use a smaller "Speed Bin" than specified for the DDR3
+      being used.
+
+  rockchip,pd_idle:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Configure the PD_IDLE value. Defines the power-down idle period in which
+      memories are placed into power-down mode if bus is idle for PD_IDLE DFI
+      clock cycles.
+      See also rockchip,pd-idle-ns.
+
+  rockchip,sr_idle:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Configure the SR_IDLE value. Defines the self-refresh idle period in
+      which memories are placed into self-refresh mode if bus is idle for
+      SR_IDLE * 1024 DFI clock cycles (DFI clocks freq is half of DRAM clock).
+      See also rockchip,sr-idle-ns.
+    default: 0
+
+  rockchip,sr_mc_gate_idle:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Defines the memory self-refresh and controller clock gating idle period.
+      Memories are placed into self-refresh mode and memory controller clock
+      arg gating started if bus is idle for sr_mc_gate_idle*1024 DFI clock
+      cycles.
+      See also rockchip,sr-mc-gate-idle-ns.
+
+  rockchip,srpd_lite_idle:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Defines the self-refresh power down idle period in which memories are
+      placed into self-refresh power down mode if bus is idle for
+      srpd_lite_idle * 1024 DFI clock cycles. This parameter is for LPDDR4
+      only.
+      See also rockchip,srpd-lite-idle-ns.
+
+  rockchip,standby_idle:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Defines the standby idle period in which memories are placed into
+      self-refresh mode. The controller, pi, PHY and DRAM clock will be gated
+      if bus is idle for standby_idle * DFI clock cycles.
+      See also rockchip,standby-idle-ns.
+
+  rockchip,dram_dll_dis_freq:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      Defines the DDR3 DLL bypass frequency in MHz. When DDR frequency is less
+      than DRAM_DLL_DISB_FREQ, DDR3 DLL will be bypassed.
+      Note: if DLL was bypassed, the odt will also stop working.
+
+  rockchip,phy_dll_dis_freq:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      Defines the PHY dll bypass frequency in MHz (Mega Hz). When DDR frequency
+      is less than DRAM_DLL_DISB_FREQ, PHY DLL will be bypassed.
+      Note: PHY DLL and PHY ODT are independent.
+
+  rockchip,auto_pd_dis_freq:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Defines the auto PD disable frequency in MHz.
+
+  rockchip,ddr3_odt_dis_freq:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 1000000  # In case anyone thought this was MHz.
+    description:
+      When the DRAM type is DDR3, this parameter defines the ODT disable
+      frequency in Hz. When the DDR frequency is less then ddr3_odt_dis_freq,
+      the ODT on the DRAM side and controller side are both disabled.
+
+  rockchip,ddr3_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is DDR3, this parameter defines the DRAM side drive
+      strength in ohms.
+    default: 40
+
+  rockchip,ddr3_odt:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is DDR3, this parameter defines the DRAM side ODT
+      strength in ohms.
+    default: 120
+
+  rockchip,phy_ddr3_ca_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is DDR3, this parameter defines the phy side CA line
+      (incluing command line, address line and clock line) drive strength.
+    default: 40
+
+  rockchip,phy_ddr3_dq_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is DDR3, this parameter defines the PHY side DQ line
+      (including DQS/DQ/DM line) drive strength.
+    default: 40
+
+  rockchip,phy_ddr3_odt:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is DDR3, this parameter defines the PHY side ODT
+      strength.
+    default: 240
+
+  rockchip,lpddr3_odt_dis_freq:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 1000000  # In case anyone thought this was MHz.
+    description:
+      When the DRAM type is LPDDR3, this parameter defines then ODT disable
+      frequency in Hz. When DDR frequency is less then ddr3_odt_dis_freq, the
+      ODT on the DRAM side and controller side are both disabled.
+
+  rockchip,lpddr3_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR3, this parameter defines the DRAM side drive
+      strength in ohms.
+    default: 34
+
+  rockchip,lpddr3_odt:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR3, this parameter defines the DRAM side ODT
+      strength in ohms.
+    default: 240
+
+  rockchip,phy_lpddr3_ca_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR3, this parameter defines the PHY side CA line
+      (including command line, address line and clock line) drive strength.
+    default: 40
+
+  rockchip,phy_lpddr3_dq_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR3, this parameter defines the PHY side DQ line
+      (including DQS/DQ/DM line) drive strength.
+    default: 40
+
+  rockchip,phy_lpddr3_odt:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When dram type is LPDDR3, this parameter define the phy side odt
+      strength, default value is 240.
+
+  rockchip,lpddr4_odt_dis_freq:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 1000000  # In case anyone thought this was MHz.
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the ODT disable
+      frequency in Hz. When the DDR frequency is less then ddr3_odt_dis_freq,
+      the ODT on the DRAM side and controller side are both disabled.
+
+  rockchip,lpddr4_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the DRAM side drive
+      strength in ohms.
+    default: 60
+
+  rockchip,lpddr4_dq_odt:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the DRAM side ODT on
+      DQS/DQ line strength in ohms.
+    default: 40
+
+  rockchip,lpddr4_ca_odt:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the DRAM side ODT on
+      CA line strength in ohms.
+    default: 40
+
+  rockchip,phy_lpddr4_ca_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the PHY side CA line
+      (including command address line) drive strength.
+    default: 40
+
+  rockchip,phy_lpddr4_ck_cs_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the PHY side clock
+      line and CS line drive strength.
+    default: 80
+
+  rockchip,phy_lpddr4_dq_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the PHY side DQ line
+      (including DQS/DQ/DM line) drive strength.
+    default: 80
+
+  rockchip,phy_lpddr4_odt:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the PHY side ODT
+      strength.
+    default: 60
+
+  rockchip,pd-idle-ns:
+    description:
+      Configure the PD_IDLE value in nanoseconds. Defines the power-down idle
+      period in which memories are placed into power-down mode if bus is idle
+      for PD_IDLE nanoseconds.
+
+  rockchip,sr-idle-ns:
+    description:
+      Configure the SR_IDLE value in nanoseconds. Defines the self-refresh idle
+      period in which memories are placed into self-refresh mode if bus is idle
+      for SR_IDLE nanoseconds.
+    default: 0
+
+  rockchip,sr-mc-gate-idle-ns:
+    description:
+      Defines the memory self-refresh and controller clock gating idle period in nanoseconds.
+      Memories are placed into self-refresh mode and memory controller clock
+      arg gating started if bus is idle for sr_mc_gate_idle nanoseconds.
+
+  rockchip,srpd-lite-idle-ns:
+    description:
+      Defines the self-refresh power down idle period in which memories are
+      placed into self-refresh power down mode if bus is idle for
+      srpd_lite_idle nanoseonds. This parameter is for LPDDR4 only.
+
+  rockchip,standby-idle-ns:
+    description:
+      Defines the standby idle period in which memories are placed into
+      self-refresh mode. The controller, pi, PHY and DRAM clock will be gated
+      if bus is idle for standby_idle nanoseconds.
+
+  rockchip,pd-idle-dis-freq-hz:
+    description:
+      Defines the power-down idle disable frequency in Hz. When the DDR
+      frequency is greater than pd-idle-dis-freq, power-down idle is disabled.
+      See also rockchip,pd-idle-ns.
+
+  rockchip,sr-idle-dis-freq-hz:
+    description:
+      Defines the self-refresh idle disable frequency in Hz. When the DDR
+      frequency is greater than sr-idle-dis-freq, self-refresh idle is
+      disabled. See also rockchip,sr-idle-ns.
+
+  rockchip,sr-mc-gate-idle-dis-freq-hz:
+    description:
+      Defines the self-refresh and memory-controller clock gating disable
+      frequency in Hz. When the DDR frequency is greater than
+      sr-mc-gate-idle-dis-freq, the clock will not be gated when idle. See also
+      rockchip,sr-mc-gate-idle-ns.
+
+  rockchip,srpd-lite-idle-dis-freq-hz:
+    description:
+      Defines the self-refresh power down idle disable frequency in Hz. When
+      the DDR frequency is greater than srpd-lite-idle-dis-freq, memory will
+      not be placed into self-refresh power down mode when idle. See also
+      rockchip,srpd-lite-idle-ns.
+
+  rockchip,standby-idle-dis-freq-hz:
+    description:
+      Defines the standby idle disable frequency in Hz. When the DDR frequency
+      is greater than standby-idle-dis-freq, standby idle is disabled. See also
+      rockchip,standby-idle-ns.
+
+required:
+  - compatible
+  - devfreq-events
+  - clocks
+  - clock-names
+  - operating-points-v2
+  - center-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/rk3399-cru.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    memory-controller {
+      compatible = "rockchip,rk3399-dmc";
+      devfreq-events = <&dfi>;
+      rockchip,pmu = <&pmu>;
+      interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>;
+      clocks = <&cru SCLK_DDRC>;
+      clock-names = "dmc_clk";
+      operating-points-v2 = <&dmc_opp_table>;
+      center-supply = <&ppvar_centerlogic>;
+      rockchip,pd-idle-ns = <160>;
+      rockchip,sr-idle-ns = <10240>;
+      rockchip,sr-mc-gate-idle-ns = <40960>;
+      rockchip,srpd-lite-idle-ns = <61440>;
+      rockchip,standby-idle-ns = <81920>;
+      rockchip,ddr3_odt_dis_freq = <333000000>;
+      rockchip,lpddr3_odt_dis_freq = <333000000>;
+      rockchip,lpddr4_odt_dis_freq = <333000000>;
+      rockchip,pd-idle-dis-freq-hz = <1000000000>;
+      rockchip,sr-idle-dis-freq-hz = <1000000000>;
+      rockchip,sr-mc-gate-idle-dis-freq-hz = <1000000000>;
+      rockchip,srpd-lite-idle-dis-freq-hz = <0>;
+      rockchip,standby-idle-dis-freq-hz = <928000000>;
+    };
diff --git a/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml b/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml
index dccd5ad96981..b672202fff4e 100644
--- a/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml
+++ b/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml
@@ -31,7 +31,7 @@ properties:
           - const: brcm,sdhci-brcmstb
 
   reg:
-    minItems: 2
+    maxItems: 2
 
   reg-names:
     items:
@@ -65,15 +65,15 @@ unevaluatedProperties: false
 examples:
   - |
     mmc@84b0000 {
-      sd-uhs-sdr50;
-      sd-uhs-ddr50;
-      sd-uhs-sdr104;
-      sdhci,auto-cmd12;
       compatible = "brcm,bcm7216-sdhci",
                    "brcm,bcm7445-sdhci",
                    "brcm,sdhci-brcmstb";
       reg = <0x84b0000 0x260>, <0x84b0300 0x200>;
       reg-names = "host", "cfg";
+      sd-uhs-sdr50;
+      sd-uhs-ddr50;
+      sd-uhs-sdr104;
+      sdhci,auto-cmd12;
       interrupts = <0x0 0x26 0x4>;
       interrupt-names = "sdio0_0";
       clocks = <&scmi_clk 245>;
@@ -81,6 +81,11 @@ examples:
     };
 
     mmc@84b1000 {
+      compatible = "brcm,bcm7216-sdhci",
+                   "brcm,bcm7445-sdhci",
+                   "brcm,sdhci-brcmstb";
+      reg = <0x84b1000 0x260>, <0x84b1300 0x200>;
+      reg-names = "host", "cfg";
       mmc-ddr-1_8v;
       mmc-hs200-1_8v;
       mmc-hs400-1_8v;
@@ -88,11 +93,6 @@ examples:
       supports-cqe;
       non-removable;
       bus-width = <0x8>;
-      compatible = "brcm,bcm7216-sdhci",
-           "brcm,bcm7445-sdhci",
-            "brcm,sdhci-brcmstb";
-      reg = <0x84b1000 0x260>, <0x84b1300 0x200>;
-      reg-names = "host", "cfg";
       interrupts = <0x0 0x27 0x4>;
       interrupt-names = "sdio1_0";
       clocks = <&scmi_clk 245>;
diff --git a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml
index 7dbbcae9485c..29339d0196ec 100644
--- a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml
+++ b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml
@@ -34,22 +34,47 @@ properties:
           - fsl,imx6ull-usdhc
           - fsl,imx7d-usdhc
           - fsl,imx7ulp-usdhc
+          - fsl,imx8mm-usdhc
           - fsl,imxrt1050-usdhc
           - nxp,s32g2-usdhc
       - items:
           - enum:
+              - fsl,imx8mq-usdhc
+          - const: fsl,imx7d-usdhc
+      - items:
+          - enum:
+              - fsl,imx8mn-usdhc
+              - fsl,imx8mp-usdhc
+              - fsl,imx93-usdhc
+              - fsl,imx8ulp-usdhc
+          - const: fsl,imx8mm-usdhc
+      - items:
+          - enum:
+              - fsl,imx8qm-usdhc
+          - const: fsl,imx8qxp-usdhc
+      - items:
+          - enum:
+              - fsl,imx8dxl-usdhc
               - fsl,imx8mm-usdhc
               - fsl,imx8mn-usdhc
               - fsl,imx8mp-usdhc
-              - fsl,imx8mq-usdhc
               - fsl,imx8qm-usdhc
               - fsl,imx8qxp-usdhc
           - const: fsl,imx7d-usdhc
+        deprecated: true
       - items:
           - enum:
-              - fsl,imx93-usdhc
-              - fsl,imx8ulp-usdhc
+              - fsl,imx8mn-usdhc
+              - fsl,imx8mp-usdhc
           - const: fsl,imx8mm-usdhc
+          - const: fsl,imx7d-usdhc
+        deprecated: true
+      - items:
+          - enum:
+              - fsl,imx8qm-usdhc
+          - const: fsl,imx8qxp-usdhc
+          - const: fsl,imx7d-usdhc
+        deprecated: true
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/mmc/marvell,dove-sdhci.yaml b/Documentation/devicetree/bindings/mmc/marvell,dove-sdhci.yaml
new file mode 100644
index 000000000000..7c9c652ad59c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/marvell,dove-sdhci.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/marvell,dove-sdhci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell sdhci-dove controller
+
+maintainers:
+  - Adrian Hunter <adrian.hunter@intel.com>
+  - Ulf Hansson <ulf.hansson@linaro.org>
+
+allOf:
+  - $ref: mmc-controller.yaml#
+
+properties:
+  compatible:
+    const: marvell,dove-sdhci
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 1
+    maxItems: 2
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    sdio0: mmc@92000 {
+      compatible = "marvell,dove-sdhci";
+      reg = <0x92000 0x100>;
+      interrupts = <35>;
+      clocks = <&gate_clk 9>;
+    };
diff --git a/Documentation/devicetree/bindings/mmc/marvell,orion-sdio.yaml b/Documentation/devicetree/bindings/mmc/marvell,orion-sdio.yaml
new file mode 100644
index 000000000000..8a97ded15aed
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/marvell,orion-sdio.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/marvell,orion-sdio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell orion-sdio controller
+
+maintainers:
+  - Nicolas Pitre <nico@fluxnic.net>
+  - Ulf Hansson <ulf.hansson@linaro.org>
+
+allOf:
+  - $ref: mmc-controller.yaml#
+
+properties:
+  compatible:
+    const: marvell,orion-sdio
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    mmc@d00d4000 {
+      compatible = "marvell,orion-sdio";
+      reg = <0xd00d4000 0x200>;
+      interrupts = <54>;
+      clocks = <&gateclk 17>;
+    };
diff --git a/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt b/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt
deleted file mode 100644
index c51a62d751dc..000000000000
--- a/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt
+++ /dev/null
@@ -1,173 +0,0 @@
-Marvell Xenon SDHCI Controller device tree bindings
-This file documents differences between the core mmc properties
-described by mmc.txt and the properties used by the Xenon implementation.
-
-Multiple SDHCs might be put into a single Xenon IP, to save size and cost.
-Each SDHC is independent and owns independent resources, such as register sets,
-clock and PHY.
-Each SDHC should have an independent device tree node.
-
-Required Properties:
-- compatible: should be one of the following
-  - "marvell,armada-3700-sdhci": For controllers on Armada-3700 SoC.
-  Must provide a second register area and marvell,pad-type.
-  - "marvell,armada-ap806-sdhci": For controllers on Armada AP806.
-  - "marvell,armada-ap807-sdhci": For controllers on Armada AP807.
-  - "marvell,armada-cp110-sdhci": For controllers on Armada CP110.
-
-- clocks:
-  Array of clocks required for SDHC.
-  Require at least input clock for Xenon IP core. For Armada AP806 and
-  CP110, the AXI clock is also mandatory.
-
-- clock-names:
-  Array of names corresponding to clocks property.
-  The input clock for Xenon IP core should be named as "core".
-  The input clock for the AXI bus must be named as "axi".
-
-- reg:
-  * For "marvell,armada-3700-sdhci", two register areas.
-    The first one for Xenon IP register. The second one for the Armada 3700 SoC
-    PHY PAD Voltage Control register.
-    Please follow the examples with compatible "marvell,armada-3700-sdhci"
-    in below.
-    Please also check property marvell,pad-type in below.
-
-  * For other compatible strings, one register area for Xenon IP.
-
-Optional Properties:
-- marvell,xenon-sdhc-id:
-  Indicate the corresponding bit index of current SDHC in
-  SDHC System Operation Control Register Bit[7:0].
-  Set/clear the corresponding bit to enable/disable current SDHC.
-  If Xenon IP contains only one SDHC, this property is optional.
-
-- marvell,xenon-phy-type:
-  Xenon support multiple types of PHYs.
-  To select eMMC 5.1 PHY, set:
-  marvell,xenon-phy-type = "emmc 5.1 phy"
-  eMMC 5.1 PHY is the default choice if this property is not provided.
-  To select eMMC 5.0 PHY, set:
-  marvell,xenon-phy-type = "emmc 5.0 phy"
-
-  All those types of PHYs can support eMMC, SD and SDIO.
-  Please note that this property only presents the type of PHY.
-  It doesn't stand for the entire SDHC type or property.
-  For example, "emmc 5.1 phy" doesn't mean that this Xenon SDHC only
-  supports eMMC 5.1.
-
-- marvell,xenon-phy-znr:
-  Set PHY ZNR value.
-  Only available for eMMC PHY.
-  Valid range = [0:0x1F].
-  ZNR is set as 0xF by default if this property is not provided.
-
-- marvell,xenon-phy-zpr:
-  Set PHY ZPR value.
-  Only available for eMMC PHY.
-  Valid range = [0:0x1F].
-  ZPR is set as 0xF by default if this property is not provided.
-
-- marvell,xenon-phy-nr-success-tun:
-  Set the number of required consecutive successful sampling points
-  used to identify a valid sampling window, in tuning process.
-  Valid range = [1:7].
-  Set as 0x4 by default if this property is not provided.
-
-- marvell,xenon-phy-tun-step-divider:
-  Set the divider for calculating TUN_STEP.
-  Set as 64 by default if this property is not provided.
-
-- marvell,xenon-phy-slow-mode:
-  If this property is selected, transfers will bypass PHY.
-  Only available when bus frequency lower than 55MHz in SDR mode.
-  Disabled by default. Please only try this property if timing issues
-  always occur with PHY enabled in eMMC HS SDR, SD SDR12, SD SDR25,
-  SD Default Speed and HS mode and eMMC legacy speed mode.
-
-- marvell,xenon-tun-count:
-  Xenon SDHC SoC usually doesn't provide re-tuning counter in
-  Capabilities Register 3 Bit[11:8].
-  This property provides the re-tuning counter.
-  If this property is not set, default re-tuning counter will
-  be set as 0x9 in driver.
-
-- marvell,pad-type:
-  Type of Armada 3700 SoC PHY PAD Voltage Controller register.
-  Only valid when "marvell,armada-3700-sdhci" is selected.
-  Two types: "sd" and "fixed-1-8v".
-  If "sd" is selected, SoC PHY PAD is set as 3.3V at the beginning and is
-  switched to 1.8V when later in higher speed mode.
-  If "fixed-1-8v" is selected, SoC PHY PAD is fixed 1.8V, such as for eMMC.
-  Please follow the examples with compatible "marvell,armada-3700-sdhci"
-  in below.
-
-Example:
-- For eMMC:
-
-	sdhci@aa0000 {
-		compatible = "marvell,armada-ap806-sdhci";
-		reg = <0xaa0000 0x1000>;
-		interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>
-		clocks = <&emmc_clk>,<&axi_clk>;
-		clock-names = "core", "axi";
-		bus-width = <4>;
-		marvell,xenon-phy-slow-mode;
-		marvell,xenon-tun-count = <11>;
-		non-removable;
-		no-sd;
-		no-sdio;
-
-		/* Vmmc and Vqmmc are both fixed */
-	};
-
-- For SD/SDIO:
-
-	sdhci@ab0000 {
-		compatible = "marvell,armada-cp110-sdhci";
-		reg = <0xab0000 0x1000>;
-		interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>
-		vqmmc-supply = <&sd_vqmmc_regulator>;
-		vmmc-supply = <&sd_vmmc_regulator>;
-		clocks = <&sdclk>, <&axi_clk>;
-		clock-names = "core", "axi";
-		bus-width = <4>;
-		marvell,xenon-tun-count = <9>;
-	};
-
-- For eMMC with compatible "marvell,armada-3700-sdhci":
-
-	sdhci@aa0000 {
-		compatible = "marvell,armada-3700-sdhci";
-		reg = <0xaa0000 0x1000>,
-		      <phy_addr 0x4>;
-		interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>
-		clocks = <&emmcclk>;
-		clock-names = "core";
-		bus-width = <8>;
-		mmc-ddr-1_8v;
-		mmc-hs400-1_8v;
-		non-removable;
-		no-sd;
-		no-sdio;
-
-		/* Vmmc and Vqmmc are both fixed */
-
-		marvell,pad-type = "fixed-1-8v";
-	};
-
-- For SD/SDIO with compatible "marvell,armada-3700-sdhci":
-
-	sdhci@ab0000 {
-		compatible = "marvell,armada-3700-sdhci";
-		reg = <0xab0000 0x1000>,
-		      <phy_addr 0x4>;
-		interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>
-		vqmmc-supply = <&sd_regulator>;
-		/* Vmmc is fixed */
-		clocks = <&sdclk>;
-		clock-names = "core";
-		bus-width = <4>;
-
-		marvell,pad-type = "sd";
-	};
diff --git a/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.yaml b/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.yaml
new file mode 100644
index 000000000000..c79639e9027e
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.yaml
@@ -0,0 +1,275 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/marvell,xenon-sdhci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Xenon SDHCI Controller
+
+description: |
+  This file documents differences between the core MMC properties described by
+  mmc-controller.yaml and the properties used by the Xenon implementation.
+
+  Multiple SDHCs might be put into a single Xenon IP, to save size and cost.
+  Each SDHC is independent and owns independent resources, such as register
+  sets, clock and PHY.
+
+  Each SDHC should have an independent device tree node.
+
+maintainers:
+  - Ulf Hansson <ulf.hansson@linaro.org>
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - marvell,armada-cp110-sdhci
+          - marvell,armada-ap806-sdhci
+
+      - items:
+          - const: marvell,armada-ap807-sdhci
+          - const: marvell,armada-ap806-sdhci
+
+      - items:
+          - const: marvell,armada-3700-sdhci
+          - const: marvell,sdhci-xenon
+
+  reg:
+    minItems: 1
+    maxItems: 2
+    description: |
+      For "marvell,armada-3700-sdhci", two register areas.  The first one
+      for Xenon IP register. The second one for the Armada 3700 SoC PHY PAD
+      Voltage Control register.  Please follow the examples with compatible
+      "marvell,armada-3700-sdhci" in below.
+      Please also check property marvell,pad-type in below.
+
+      For other compatible strings, one register area for Xenon IP.
+
+  clocks:
+    minItems: 1
+    maxItems: 2
+
+  clock-names:
+    minItems: 1
+    items:
+      - const: core
+      - const: axi
+
+  marvell,xenon-sdhc-id:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 7
+    description: |
+      Indicate the corresponding bit index of current SDHC in SDHC System
+      Operation Control Register Bit[7:0].  Set/clear the corresponding bit to
+      enable/disable current SDHC.
+
+  marvell,xenon-phy-type:
+    $ref: /schemas/types.yaml#/definitions/string
+    enum:
+      - "emmc 5.1 phy"
+      - "emmc 5.0 phy"
+    description: |
+      Xenon support multiple types of PHYs. To select eMMC 5.1 PHY, set:
+      marvell,xenon-phy-type = "emmc 5.1 phy" eMMC 5.1 PHY is the default
+      choice if this property is not provided.  To select eMMC 5.0 PHY, set:
+      marvell,xenon-phy-type = "emmc 5.0 phy"
+
+      All those types of PHYs can support eMMC, SD and SDIO. Please note that
+      this property only presents the type of PHY.  It doesn't stand for the
+      entire SDHC type or property.  For example, "emmc 5.1 phy" doesn't mean
+      that this Xenon SDHC only supports eMMC 5.1.
+
+  marvell,xenon-phy-znr:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 0x1f
+    default: 0xf
+    description: |
+      Set PHY ZNR value.
+      Only available for eMMC PHY.
+
+  marvell,xenon-phy-zpr:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 0x1f
+    default: 0xf
+    description: |
+      Set PHY ZPR value.
+      Only available for eMMC PHY.
+
+  marvell,xenon-phy-nr-success-tun:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 1
+    maximum: 7
+    default: 0x4
+    description: |
+      Set the number of required consecutive successful sampling points
+      used to identify a valid sampling window, in tuning process.
+
+  marvell,xenon-phy-tun-step-divider:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    default: 64
+    description: |
+      Set the divider for calculating TUN_STEP.
+
+  marvell,xenon-phy-slow-mode:
+    type: boolean
+    description: |
+      If this property is selected, transfers will bypass PHY.
+      Only available when bus frequency lower than 55MHz in SDR mode.
+      Disabled by default. Please only try this property if timing issues
+      always occur with PHY enabled in eMMC HS SDR, SD SDR12, SD SDR25,
+      SD Default Speed and HS mode and eMMC legacy speed mode.
+
+  marvell,xenon-tun-count:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    default: 0x9
+    description: |
+      Xenon SDHC SoC usually doesn't provide re-tuning counter in
+      Capabilities Register 3 Bit[11:8].
+      This property provides the re-tuning counter.
+
+allOf:
+  - $ref: mmc-controller.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: marvell,armada-3700-sdhci
+
+    then:
+      properties:
+        reg:
+          items:
+            - description: Xenon IP registers
+            - description: Armada 3700 SoC PHY PAD Voltage Control register
+          minItems: 2
+
+        marvell,pad-type:
+          $ref: /schemas/types.yaml#/definitions/string
+          enum:
+            - sd
+            - fixed-1-8v
+          description: |
+            Type of Armada 3700 SoC PHY PAD Voltage Controller register.
+            If "sd" is selected, SoC PHY PAD is set as 3.3V at the beginning
+            and is switched to 1.8V when later in higher speed mode.
+            If "fixed-1-8v" is selected, SoC PHY PAD is fixed 1.8V, such as for
+            eMMC.
+            Please follow the examples with compatible
+            "marvell,armada-3700-sdhci" in below.
+
+      required:
+        - marvell,pad-type
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - marvell,armada-cp110-sdhci
+              - marvell,armada-ap807-sdhci
+              - marvell,armada-ap806-sdhci
+
+    then:
+      properties:
+        clocks:
+          minItems: 2
+
+        clock-names:
+          items:
+            - const: core
+            - const: axi
+
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    // For eMMC
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    mmc@aa0000 {
+      compatible = "marvell,armada-ap807-sdhci", "marvell,armada-ap806-sdhci";
+      reg = <0xaa0000 0x1000>;
+      interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
+      clocks = <&emmc_clk 0>, <&axi_clk 0>;
+      clock-names = "core", "axi";
+      bus-width = <4>;
+      marvell,xenon-phy-slow-mode;
+      marvell,xenon-tun-count = <11>;
+      non-removable;
+      no-sd;
+      no-sdio;
+
+      /* Vmmc and Vqmmc are both fixed */
+    };
+
+  - |
+    // For SD/SDIO
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    mmc@ab0000 {
+      compatible = "marvell,armada-cp110-sdhci";
+      reg = <0xab0000 0x1000>;
+      interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+      vqmmc-supply = <&sd_vqmmc_regulator>;
+      vmmc-supply = <&sd_vmmc_regulator>;
+      clocks = <&sdclk 0>, <&axi_clk 0>;
+      clock-names = "core", "axi";
+      bus-width = <4>;
+      marvell,xenon-tun-count = <9>;
+    };
+
+  - |
+    // For eMMC with compatible "marvell,armada-3700-sdhci":
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    mmc@aa0000 {
+      compatible = "marvell,armada-3700-sdhci", "marvell,sdhci-xenon";
+      reg = <0xaa0000 0x1000>,
+            <0x17808 0x4>;
+      interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
+      clocks = <&emmcclk 0>;
+      clock-names = "core";
+      bus-width = <8>;
+      mmc-ddr-1_8v;
+      mmc-hs400-1_8v;
+      non-removable;
+      no-sd;
+      no-sdio;
+
+      /* Vmmc and Vqmmc are both fixed */
+
+      marvell,pad-type = "fixed-1-8v";
+    };
+
+  - |
+    // For SD/SDIO with compatible "marvell,armada-3700-sdhci":
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    mmc@ab0000 {
+      compatible = "marvell,armada-3700-sdhci", "marvell,sdhci-xenon";
+      reg = <0xab0000 0x1000>,
+            <0x17808 0x4>;
+      interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+      vqmmc-supply = <&sd_regulator>;
+      /* Vmmc is fixed */
+      clocks = <&sdclk 0>;
+      clock-names = "core";
+      bus-width = <4>;
+
+      marvell,pad-type = "sd";
+    };
diff --git a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
index 513f3c8758aa..ff5ce89e5111 100644
--- a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
+++ b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
@@ -298,7 +298,10 @@ properties:
 
   vqmmc-supply:
     description:
-      Supply for the bus IO line power
+      Supply for the bus IO line power, such as a level shifter.
+      If the level shifter is controlled by a GPIO line, this shall
+      be modeled as a "regulator-fixed" with a GPIO line for
+      switching the level shifter on/off.
 
   mmc-pwrseq:
     $ref: /schemas/types.yaml#/definitions/phandle
diff --git a/Documentation/devicetree/bindings/mmc/mtk-sd.yaml b/Documentation/devicetree/bindings/mmc/mtk-sd.yaml
index 297ada03e3de..2a2e9fa8c188 100644
--- a/Documentation/devicetree/bindings/mmc/mtk-sd.yaml
+++ b/Documentation/devicetree/bindings/mmc/mtk-sd.yaml
@@ -40,7 +40,10 @@ properties:
           - const: mediatek,mt8183-mmc
 
   reg:
-    maxItems: 1
+    minItems: 1
+    items:
+      - description: base register (required).
+      - description: top base register (required for MT8183).
 
   clocks:
     description:
@@ -168,6 +171,16 @@ required:
   - vmmc-supply
   - vqmmc-supply
 
+if:
+  properties:
+    compatible:
+      contains:
+        const: mediatek,mt8183-mmc
+then:
+  properties:
+    reg:
+      minItems: 2
+
 unevaluatedProperties: false
 
 examples:
diff --git a/Documentation/devicetree/bindings/mmc/orion-sdio.txt b/Documentation/devicetree/bindings/mmc/orion-sdio.txt
deleted file mode 100644
index 10f0818a34c5..000000000000
--- a/Documentation/devicetree/bindings/mmc/orion-sdio.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-* Marvell orion-sdio controller
-
-This file documents differences between the core properties in mmc.txt
-and the properties used by the orion-sdio driver.
-
-- compatible: Should be "marvell,orion-sdio"
-- clocks: reference to the clock of the SDIO interface
-
-Example:
-
-	mvsdio@d00d4000 {
-		compatible = "marvell,orion-sdio";
-		reg = <0xd00d4000 0x200>;
-		interrupts = <54>;
-		clocks = <&gateclk 17>;
-	};
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-am654.yaml b/Documentation/devicetree/bindings/mmc/sdhci-am654.yaml
index 0566493c4def..0ab07759b472 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-am654.yaml
+++ b/Documentation/devicetree/bindings/mmc/sdhci-am654.yaml
@@ -186,6 +186,13 @@ properties:
     description: Clock Delay Buffer Select
     $ref: "/schemas/types.yaml#/definitions/uint32"
 
+  ti,fails-without-test-cd:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      When present, indicates that the CD line is not connected
+      and the controller is required to be forced into Test mode
+      to set the TESTCD bit.
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-dove.txt b/Documentation/devicetree/bindings/mmc/sdhci-dove.txt
deleted file mode 100644
index ae9aab9abcd7..000000000000
--- a/Documentation/devicetree/bindings/mmc/sdhci-dove.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-* Marvell sdhci-dove controller
-
-This file documents differences between the core properties in mmc.txt
-and the properties used by the sdhci-pxav2 and sdhci-pxav3 drivers.
-
-- compatible: Should be "marvell,dove-sdhci".
-
-Example:
-
-sdio0: sdio@92000 {
-	compatible = "marvell,dove-sdhci";
-	reg = <0x92000 0x100>;
-	interrupts = <35>;
-};
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
deleted file mode 100644
index 6216ed777343..000000000000
--- a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
+++ /dev/null
@@ -1,123 +0,0 @@
-* Qualcomm SDHCI controller (sdhci-msm)
-
-This file documents differences between the core properties in mmc.txt
-and the properties used by the sdhci-msm driver.
-
-Required properties:
-- compatible: Should contain a SoC-specific string and a IP version string:
-	version strings:
-		"qcom,sdhci-msm-v4" for sdcc versions less than 5.0
-		"qcom,sdhci-msm-v5" for sdcc version 5.0
-		For SDCC version 5.0.0, MCI registers are removed from SDCC
-		interface and some registers are moved to HC. New compatible
-		string is added to support this change - "qcom,sdhci-msm-v5".
-	full compatible strings with SoC and version:
-		"qcom,apq8084-sdhci", "qcom,sdhci-msm-v4"
-		"qcom,msm8226-sdhci", "qcom,sdhci-msm-v4"
-		"qcom,msm8953-sdhci", "qcom,sdhci-msm-v4"
-		"qcom,msm8974-sdhci", "qcom,sdhci-msm-v4"
-		"qcom,msm8916-sdhci", "qcom,sdhci-msm-v4"
-		"qcom,msm8992-sdhci", "qcom,sdhci-msm-v4"
-		"qcom,msm8994-sdhci", "qcom,sdhci-msm-v4"
-		"qcom,msm8996-sdhci", "qcom,sdhci-msm-v4"
-		"qcom,qcs404-sdhci", "qcom,sdhci-msm-v5"
-		"qcom,sc7180-sdhci", "qcom,sdhci-msm-v5";
-		"qcom,sc7280-sdhci", "qcom,sdhci-msm-v5";
-		"qcom,sdm845-sdhci", "qcom,sdhci-msm-v5"
-		"qcom,sdx55-sdhci", "qcom,sdhci-msm-v5";
-		"qcom,sm8250-sdhci", "qcom,sdhci-msm-v5"
-	NOTE that some old device tree files may be floating around that only
-	have the string "qcom,sdhci-msm-v4" without the SoC compatible string
-	but doing that should be considered a deprecated practice.
-
-- reg: Base address and length of the register in the following order:
-	- Host controller register map (required)
-	- SD Core register map (required for controllers earlier than msm-v5)
-	- CQE register map (Optional, CQE support is present on SDHC instance meant
-	                    for eMMC and version v4.2 and above)
-	- Inline Crypto Engine register map (optional)
-- reg-names: When CQE register map is supplied, below reg-names are required
-	- "hc" for Host controller register map
-	- "core" for SD core register map
-	- "cqhci" for CQE register map
-	- "ice" for Inline Crypto Engine register map (optional)
-- interrupts: Should contain an interrupt-specifiers for the interrupts:
-	- Host controller interrupt (required)
-- pinctrl-names: Should contain only one value - "default".
-- pinctrl-0: Should specify pin control groups used for this controller.
-- clocks: A list of phandle + clock-specifier pairs for the clocks listed in clock-names.
-- clock-names: Should contain the following:
-	"iface" - Main peripheral bus clock (PCLK/HCLK - AHB Bus clock) (required)
-	"core"	- SDC MMC clock (MCLK) (required)
-	"bus"	- SDCC bus voter clock (optional)
-	"xo"	- TCXO clock (optional)
-	"cal"	- reference clock for RCLK delay calibration (optional)
-	"sleep"	- sleep clock for RCLK delay calibration (optional)
-	"ice" - clock for Inline Crypto Engine (optional)
-
-- qcom,ddr-config: Certain chipsets and platforms require particular settings
-	for the DDR_CONFIG register. Use this field to specify the register
-	value as per the Hardware Programming Guide.
-
-- qcom,dll-config: Chipset and Platform specific value. Use this field to
-	specify the DLL_CONFIG register value as per Hardware Programming Guide.
-
-Optional Properties:
-* Following bus parameters are required for interconnect bandwidth scaling:
-- interconnects: Pairs of phandles and interconnect provider specifier
-		 to denote the edge source and destination ports of
-		 the interconnect path.
-
-- interconnect-names: For sdhc, we have two main paths.
-		1. Data path : sdhc to ddr
-		2. Config path : cpu to sdhc
-		For Data interconnect path the name supposed to be
-		is "sdhc-ddr" and for config interconnect path it is
-		"cpu-sdhc".
-		Please refer to Documentation/devicetree/bindings/
-		interconnect/ for more details.
-
-Example:
-
-	sdhc_1: sdhci@f9824900 {
-		compatible = "qcom,msm8974-sdhci", "qcom,sdhci-msm-v4";
-		reg = <0xf9824900 0x11c>, <0xf9824000 0x800>;
-		interrupts = <0 123 0>;
-		bus-width = <8>;
-		non-removable;
-
-		vmmc-supply = <&pm8941_l20>;
-		vqmmc-supply = <&pm8941_s3>;
-
-		pinctrl-names = "default";
-		pinctrl-0 = <&sdc1_clk &sdc1_cmd &sdc1_data>;
-
-		clocks = <&gcc GCC_SDCC1_APPS_CLK>, <&gcc GCC_SDCC1_AHB_CLK>;
-		clock-names = "core", "iface";
-		interconnects = <&qnoc MASTER_SDCC_ID &qnoc SLAVE_DDR_ID>,
-				<&qnoc MASTER_CPU_ID &qnoc SLAVE_SDCC_ID>;
-		interconnect-names = "sdhc-ddr","cpu-sdhc";
-
-		qcom,dll-config = <0x000f642c>;
-		qcom,ddr-config = <0x80040868>;
-	};
-
-	sdhc_2: sdhci@f98a4900 {
-		compatible = "qcom,msm8974-sdhci", "qcom,sdhci-msm-v4";
-		reg = <0xf98a4900 0x11c>, <0xf98a4000 0x800>;
-		interrupts = <0 125 0>;
-		bus-width = <4>;
-		cd-gpios = <&msmgpio 62 0x1>;
-
-		vmmc-supply = <&pm8941_l21>;
-		vqmmc-supply = <&pm8941_l13>;
-
-		pinctrl-names = "default";
-		pinctrl-0 = <&sdc2_clk &sdc2_cmd &sdc2_data>;
-
-		clocks = <&gcc GCC_SDCC2_APPS_CLK>, <&gcc GCC_SDCC2_AHB_CLK>;
-		clock-names = "core", "iface";
-
-		qcom,dll-config = <0x0007642c>;
-		qcom,ddr-config = <0x80040868>;
-	};
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml
new file mode 100644
index 000000000000..e4236334e748
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml
@@ -0,0 +1,194 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/mmc/sdhci-msm.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Qualcomm SDHCI controller (sdhci-msm)
+
+maintainers:
+  - Bhupesh Sharma <bhupesh.sharma@linaro.org>
+
+description:
+  Secure Digital Host Controller Interface (SDHCI) present on
+  Qualcomm SOCs supports SD/MMC/SDIO devices.
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - qcom,apq8084-sdhci
+              - qcom,msm8226-sdhci
+              - qcom,msm8953-sdhci
+              - qcom,msm8974-sdhci
+              - qcom,msm8916-sdhci
+              - qcom,msm8992-sdhci
+              - qcom,msm8994-sdhci
+              - qcom,msm8996-sdhci
+              - qcom,qcs404-sdhci
+              - qcom,sc7180-sdhci
+              - qcom,sc7280-sdhci
+              - qcom,sdm630-sdhci
+              - qcom,sdm845-sdhci
+              - qcom,sdx55-sdhci
+              - qcom,sdx65-sdhci
+              - qcom,sm6125-sdhci
+              - qcom,sm6350-sdhci
+              - qcom,sm8150-sdhci
+              - qcom,sm8250-sdhci
+          - enum:
+              - qcom,sdhci-msm-v4 # for sdcc versions less than 5.0
+              - qcom,sdhci-msm-v5 # for sdcc version 5.0
+      - items:
+          - const: qcom,sdhci-msm-v4 # Deprecated (only for backward compatibility)
+                                     # for sdcc versions less than 5.0
+
+  reg:
+    minItems: 1
+    items:
+      - description: Host controller register map
+      - description: SD Core register map
+      - description: CQE register map
+      - description: Inline Crypto Engine register map
+
+  clocks:
+    minItems: 3
+    items:
+      - description: Main peripheral bus clock, PCLK/HCLK - AHB Bus clock
+      - description: SDC MMC clock, MCLK
+      - description: TCXO clock
+      - description: clock for Inline Crypto Engine
+      - description: SDCC bus voter clock
+      - description: reference clock for RCLK delay calibration
+      - description: sleep clock for RCLK delay calibration
+
+  clock-names:
+    minItems: 2
+    items:
+      - const: iface
+      - const: core
+      - const: xo
+      - const: ice
+      - const: bus
+      - const: cal
+      - const: sleep
+
+  interrupts:
+    maxItems: 2
+
+  interrupt-names:
+    items:
+      - const: hc_irq
+      - const: pwr_irq
+
+  pinctrl-names:
+    minItems: 1
+    items:
+      - const: default
+      - const: sleep
+
+  pinctrl-0:
+    description:
+      Should specify pin control groups used for this controller.
+
+  qcom,ddr-config:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: platform specific settings for DDR_CONFIG reg.
+
+  qcom,dll-config:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: platform specific settings for DLL_CONFIG reg.
+
+  iommus:
+    minItems: 1
+    maxItems: 8
+    description: |
+      phandle to apps_smmu node with sid mask.
+
+  interconnects:
+    items:
+      - description: data path, sdhc to ddr
+      - description: config path, cpu to sdhc
+
+  interconnect-names:
+    items:
+      - const: sdhc-ddr
+      - const: cpu-sdhc
+
+  power-domains:
+    description: A phandle to sdhci power domain node
+    maxItems: 1
+
+patternProperties:
+  '^opp-table(-[a-z0-9]+)?$':
+    if:
+      properties:
+        compatible:
+          const: operating-points-v2
+    then:
+      patternProperties:
+        '^opp-?[0-9]+$':
+          required:
+            - required-opps
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - interrupts
+
+additionalProperties: true
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/qcom,gcc-sm8250.h>
+    #include <dt-bindings/clock/qcom,rpmh.h>
+    #include <dt-bindings/power/qcom-rpmpd.h>
+
+    sdhc_2: sdhci@8804000 {
+      compatible = "qcom,sm8250-sdhci", "qcom,sdhci-msm-v5";
+      reg = <0 0x08804000 0 0x1000>;
+
+      interrupts = <GIC_SPI 204 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 222 IRQ_TYPE_LEVEL_HIGH>;
+      interrupt-names = "hc_irq", "pwr_irq";
+
+      clocks = <&gcc GCC_SDCC2_AHB_CLK>,
+               <&gcc GCC_SDCC2_APPS_CLK>,
+               <&rpmhcc RPMH_CXO_CLK>;
+      clock-names = "iface", "core", "xo";
+      iommus = <&apps_smmu 0x4a0 0x0>;
+      qcom,dll-config = <0x0007642c>;
+      qcom,ddr-config = <0x80040868>;
+      power-domains = <&rpmhpd SM8250_CX>;
+
+      operating-points-v2 = <&sdhc2_opp_table>;
+
+      sdhc2_opp_table: opp-table {
+        compatible = "operating-points-v2";
+
+        opp-19200000 {
+          opp-hz = /bits/ 64 <19200000>;
+          required-opps = <&rpmhpd_opp_min_svs>;
+        };
+
+        opp-50000000 {
+          opp-hz = /bits/ 64 <50000000>;
+          required-opps = <&rpmhpd_opp_low_svs>;
+        };
+
+        opp-100000000 {
+          opp-hz = /bits/ 64 <100000000>;
+          required-opps = <&rpmhpd_opp_svs>;
+        };
+
+        opp-202000000 {
+          opp-hz = /bits/ 64 <202000000>;
+          required-opps = <&rpmhpd_opp_svs_l1>;
+        };
+      };
+    };
diff --git a/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml b/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml
index f300ced4cdf3..71f8e726d641 100644
--- a/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml
+++ b/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml
@@ -17,6 +17,7 @@ properties:
   compatible:
     enum:
       - rockchip,rk3568-dwcmshc
+      - rockchip,rk3588-dwcmshc
       - snps,dwcmshc-sdhci
 
   reg:
diff --git a/Documentation/devicetree/bindings/mtd/aspeed-smc.txt b/Documentation/devicetree/bindings/mtd/aspeed-smc.txt
deleted file mode 100644
index 49f6528ef547..000000000000
--- a/Documentation/devicetree/bindings/mtd/aspeed-smc.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-* Aspeed Firmware Memory controller
-* Aspeed SPI Flash Memory Controller
-
-The Firmware Memory Controller in the Aspeed AST2500 SoC supports
-three chip selects, two of which are always of SPI type and the third
-can be SPI or NOR type flash. These bindings only describe SPI.
-
-The two SPI flash memory controllers in the AST2500 each support two
-chip selects.
-
-Required properties:
-  - compatible : Should be one of
-	"aspeed,ast2400-fmc" for the AST2400 Firmware Memory Controller
-	"aspeed,ast2400-spi" for the AST2400 SPI Flash memory Controller
-	"aspeed,ast2500-fmc" for the AST2500 Firmware Memory Controller
-	"aspeed,ast2500-spi" for the AST2500 SPI flash memory controllers
-
-  - reg : the first contains the control register location and length,
-          the second contains the memory window mapping address and length
-  - #address-cells : must be 1 corresponding to chip select child binding
-  - #size-cells : must be 0 corresponding to chip select child binding
-
-Optional properties:
-  - interrupts : Should contain the interrupt for the dma device if an
-    FMC
-
-The child nodes are the SPI flash modules which must have a compatible
-property as specified in bindings/mtd/jedec,spi-nor.txt
-
-Optionally, the child node can contain properties for SPI mode (may be
-ignored):
-  - spi-max-frequency - max frequency of spi bus
-
-
-Example:
-fmc: fmc@1e620000 {
-	compatible = "aspeed,ast2500-fmc";
-	reg = < 0x1e620000 0x94
-		0x20000000 0x02000000 >;
-	#address-cells = <1>;
-	#size-cells = <0>;
-	interrupts = <19>;
-	flash@0 {
-		reg = < 0 >;
-		compatible = "jedec,spi-nor";
-		/* spi-max-frequency = <>; */
-		/* m25p,fast-read; */
-		#address-cells = <1>;
-		#size-cells = <1>;
-	};
-};
diff --git a/Documentation/devicetree/bindings/mtd/elm.txt b/Documentation/devicetree/bindings/mtd/elm.txt
deleted file mode 100644
index 59ddc61c1076..000000000000
--- a/Documentation/devicetree/bindings/mtd/elm.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-Error location module
-
-Required properties:
-- compatible: Must be "ti,am3352-elm"
-- reg: physical base address and size of the registers map.
-- interrupts: Interrupt number for the elm.
-
-Optional properties:
-- ti,hwmods: Name of the hwmod associated to the elm
-
-Example:
-elm: elm@0 {
-	compatible = "ti,am3352-elm";
-	reg = <0x48080000 0x2000>;
-	interrupts = <4>;
-};
diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml
index 4abfb4cfc157..7149784a36ac 100644
--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml
+++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml
@@ -50,10 +50,6 @@ properties:
     minItems: 1
     maxItems: 2
 
-  spi-max-frequency: true
-  spi-rx-bus-width: true
-  spi-tx-bus-width: true
-
   m25p,fast-read:
     type: boolean
     description:
@@ -74,8 +70,6 @@ properties:
       be used on such systems, to denote the absence of a reliable reset
       mechanism.
 
-  label: true
-
   partitions:
     type: object
 
@@ -99,8 +93,6 @@ examples:
         #size-cells = <0>;
 
         flash@0 {
-            #address-cells = <1>;
-            #size-cells = <1>;
             compatible = "spansion,m25p80", "jedec,spi-nor";
             reg = <0>;
             spi-max-frequency = <40000000>;
diff --git a/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml b/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml
index ea4cace6a955..ad3ccd250802 100644
--- a/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml
+++ b/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml
@@ -19,7 +19,11 @@ maintainers:
 
 properties:
   compatible:
-    const: fixed-partitions
+    oneOf:
+      - const: fixed-partitions
+      - items:
+          - const: sercomm,sc-partitions
+          - const: fixed-partitions
 
   "#address-cells": true
 
@@ -27,7 +31,24 @@ properties:
 
 patternProperties:
   "@[0-9a-f]+$":
-    $ref: "partition.yaml#"
+    allOf:
+      - $ref: "partition.yaml#"
+      - if:
+          properties:
+            compatible:
+              contains:
+                const: sercomm,sc-partitions
+        then:
+          properties:
+            sercomm,scpart-id:
+              description: Partition id in Sercomm partition map. Mtd
+                parser uses this id to find a record in the partition map
+                containing offset and size of the current partition. The
+                values from partition map overrides partition offset and
+                size defined in reg property of the dts. Frequently these
+                values are the same, but may differ if device has bad
+                eraseblocks on a flash.
+              $ref: /schemas/types.yaml#/definitions/uint32
 
 required:
   - "#address-cells"
@@ -52,6 +73,7 @@ examples:
             reg = <0x0100000 0x200000>;
         };
     };
+
   - |
     partitions {
         compatible = "fixed-partitions";
@@ -64,6 +86,7 @@ examples:
             reg = <0x00000000 0x1 0x00000000>;
         };
     };
+
   - |
     partitions {
         compatible = "fixed-partitions";
@@ -82,6 +105,7 @@ examples:
             reg = <0x2 0x00000000 0x1 0x00000000>;
         };
     };
+
   - |
     partitions {
         compatible = "fixed-partitions";
@@ -119,3 +143,30 @@ examples:
             };
         };
     };
+
+  - |
+    partitions {
+        compatible = "sercomm,sc-partitions", "fixed-partitions";
+        #address-cells = <1>;
+        #size-cells = <1>;
+
+        partition@0 {
+            label = "u-boot";
+            reg = <0x0 0x100000>;
+            sercomm,scpart-id = <0>;
+            read-only;
+        };
+
+        partition@100000 {
+            label = "dynamic partition map";
+            reg = <0x100000 0x100000>;
+            sercomm,scpart-id = <1>;
+        };
+
+        partition@200000 {
+            label = "Factory";
+            reg = <0x200000 0x100000>;
+            sercomm,scpart-id = <2>;
+            read-only;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/mtd/renesas-nandc.yaml b/Documentation/devicetree/bindings/mtd/renesas-nandc.yaml
index 2870d36361c4..7b18bc5cc8b3 100644
--- a/Documentation/devicetree/bindings/mtd/renesas-nandc.yaml
+++ b/Documentation/devicetree/bindings/mtd/renesas-nandc.yaml
@@ -36,11 +36,15 @@ properties:
       - const: hclk
       - const: eclk
 
+  power-domains:
+    maxItems: 1
+
 required:
   - compatible
   - reg
   - clocks
   - clock-names
+  - power-domains
   - interrupts
 
 unevaluatedProperties: false
@@ -56,6 +60,7 @@ examples:
         interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
         clocks = <&sysctrl R9A06G032_HCLK_NAND>, <&sysctrl R9A06G032_CLK_NAND>;
         clock-names = "hclk", "eclk";
+        power-domains = <&sysctrl>;
         #address-cells = <1>;
         #size-cells = <0>;
     };
diff --git a/Documentation/devicetree/bindings/mtd/ti,elm.yaml b/Documentation/devicetree/bindings/mtd/ti,elm.yaml
new file mode 100644
index 000000000000..87128c004596
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/ti,elm.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mtd/ti,elm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments Error Location Module (ELM).
+
+maintainers:
+  - Roger Quadros <rogerq@kernel.org>
+
+description:
+  ELM module is used together with GPMC and NAND Flash to detect
+  errors and the location of the error based on BCH algorithms
+  so they can be corrected if possible.
+
+properties:
+  compatible:
+    enum:
+      - ti,am3352-elm
+      - ti,am64-elm
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+    description: Functional clock.
+
+  clock-names:
+    items:
+      - const: fck
+
+  power-domains:
+    maxItems: 1
+
+  ti,hwmods:
+    description:
+      Name of the HWMOD associated with ELM. This is for legacy
+      platforms only.
+    $ref: /schemas/types.yaml#/definitions/string
+    deprecated: true
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: ti,am64-elm
+    then:
+      required:
+        - clocks
+        - clock-names
+        - power-domains
+
+additionalProperties: false
+
+examples:
+  - |
+    elm: ecc@0 {
+        compatible = "ti,am3352-elm";
+        reg = <0x0 0x2000>;
+        interrupts = <4>;
+    };
diff --git a/Documentation/devicetree/bindings/perf/arm,cmn.yaml b/Documentation/devicetree/bindings/perf/arm,cmn.yaml
index 2d4219ec7eda..2e51072e794a 100644
--- a/Documentation/devicetree/bindings/perf/arm,cmn.yaml
+++ b/Documentation/devicetree/bindings/perf/arm,cmn.yaml
@@ -14,6 +14,8 @@ properties:
   compatible:
     enum:
       - arm,cmn-600
+      - arm,cmn-650
+      - arm,cmn-700
       - arm,ci-700
 
   reg:
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt b/Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt
index 8a70696395a7..22ad012660e9 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt
@@ -6,12 +6,6 @@ The cache bindings explained below are Devicetree Specification compliant
 Required Properties:
 
 - compatible	: Should include one of the following:
-		  "fsl,8540-l2-cache-controller"
-		  "fsl,8541-l2-cache-controller"
-		  "fsl,8544-l2-cache-controller"
-		  "fsl,8548-l2-cache-controller"
-		  "fsl,8555-l2-cache-controller"
-		  "fsl,8568-l2-cache-controller"
 		  "fsl,b4420-l2-cache-controller"
 		  "fsl,b4860-l2-cache-controller"
 		  "fsl,bsc9131-l2-cache-controller"
diff --git a/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml b/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml
index 61dd5af80db6..5d2d989de893 100644
--- a/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml
@@ -31,7 +31,7 @@ properties:
         $ref: "regulator.yaml#"
 
         properties:
-          regulator-name:
+          regulator-compatible:
             pattern: "^vbuck[1-4]$"
 
     additionalProperties: false
diff --git a/Documentation/devicetree/bindings/regulator/mt6358-regulator.txt b/Documentation/devicetree/bindings/regulator/mt6358-regulator.txt
index 9a90a92f2d7e..7034cdca54e0 100644
--- a/Documentation/devicetree/bindings/regulator/mt6358-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/mt6358-regulator.txt
@@ -8,14 +8,14 @@ Documentation/devicetree/bindings/regulator/regulator.txt.
 
 The valid names for regulators are::
 BUCK:
-  buck_vdram1, buck_vcore, buck_vpa, buck_vproc11, buck_vproc12, buck_vgpu,
-  buck_vs2, buck_vmodem, buck_vs1
+  buck_vdram1, buck_vcore, buck_vcore_sshub, buck_vpa, buck_vproc11,
+  buck_vproc12, buck_vgpu, buck_vs2, buck_vmodem, buck_vs1
 LDO:
   ldo_vdram2, ldo_vsim1, ldo_vibr, ldo_vrf12, ldo_vio18, ldo_vusb, ldo_vcamio,
   ldo_vcamd, ldo_vcn18, ldo_vfe28, ldo_vsram_proc11, ldo_vcn28, ldo_vsram_others,
-  ldo_vsram_gpu, ldo_vxo22, ldo_vefuse, ldo_vaux18, ldo_vmch, ldo_vbif28,
-  ldo_vsram_proc12, ldo_vcama1, ldo_vemc, ldo_vio28, ldo_va12, ldo_vrf18,
-  ldo_vcn33_bt, ldo_vcn33_wifi, ldo_vcama2, ldo_vmc, ldo_vldo28, ldo_vaud28,
+  ldo_vsram_others_sshub, ldo_vsram_gpu, ldo_vxo22, ldo_vefuse, ldo_vaux18,
+  ldo_vmch, ldo_vbif28, ldo_vsram_proc12, ldo_vcama1, ldo_vemc, ldo_vio28, ldo_va12,
+  ldo_vrf18, ldo_vcn33_bt, ldo_vcn33_wifi, ldo_vcama2, ldo_vmc, ldo_vldo28, ldo_vaud28,
   ldo_vsim2
 
 Example:
@@ -354,5 +354,17 @@ Example:
 				regulator-max-microvolt = <3100000>;
 				regulator-enable-ramp-delay = <540>;
 			};
+
+			mt6358_vcore_sshub_reg: buck_vcore_sshub {
+				regulator-name = "vcore_sshub";
+				regulator-min-microvolt = <500000>;
+				regulator-max-microvolt = <1293750>;
+			};
+
+			mt6358_vsram_others_sshub_reg: ldo_vsram_others_sshub {
+				regulator-name = "vsram_others_sshub";
+				regulator-min-microvolt = <500000>;
+				regulator-max-microvolt = <1293750>;
+			};
 		};
 	};
diff --git a/Documentation/devicetree/bindings/regulator/nxp,pca9450-regulator.yaml b/Documentation/devicetree/bindings/regulator/nxp,pca9450-regulator.yaml
index f70f2e758a00..b539781e39aa 100644
--- a/Documentation/devicetree/bindings/regulator/nxp,pca9450-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/nxp,pca9450-regulator.yaml
@@ -92,6 +92,17 @@ properties:
       LDO5CTRL_L or LDO5CTRL_H register. Use this if the SD_VSEL signal is
       connected to a host GPIO.
 
+  nxp,i2c-lt-enable:
+    type: boolean
+    description:
+      Indicates that the I2C Level Translator is used.
+
+  nxp,wdog_b-warm-reset:
+    type: boolean
+    description:
+      When WDOG_B signal is asserted a warm reset will be done instead of cold
+      reset.
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
index e28ee9e46788..9a36bee750af 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
@@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Qualcomm Technologies, Inc. RPMh Regulators
 
 maintainers:
-  - David Collins <collinsd@codeaurora.org>
+  - Bjorn Andersson <bjorn.andersson@linaro.org>
+  - Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 
 description: |
     rpmh-regulator devices support PMIC regulator management via the Voltage
@@ -78,7 +79,7 @@ properties:
         RPMh resource name suffix used for the regulators found
         on this PMIC.
     $ref: /schemas/types.yaml#/definitions/string
-    enum: [a, b, c, d, e, f]
+    enum: [a, b, c, d, e, f, h, k]
 
   qcom,always-wait-for-ack:
     description: |
@@ -94,35 +95,264 @@ properties:
   vdd-rgb-supply:
     description: Input supply phandle of rgb.
 
-  vin-lvs-1-2-supply:
-    description: Input supply phandle of one or more regulators.
-
-  vdd-bob-supply:
-    description: BOB regulator parent supply phandle.
-
   bob:
     type: object
     $ref: "regulator.yaml#"
     description: BOB regulator node.
 
 patternProperties:
-  "^vdd-s([0-9]+)-supply$":
-    description: Input supply phandle(s) of one or more regulators.
-
-  "^vdd-(l[0-9]+[-]){1,5}supply$":
-    description: Input supply phandle(s) of one or more regulators.
-
   "^(smps|ldo|lvs)[0-9]+$":
     type: object
     $ref: "regulator.yaml#"
     description: smps/ldo regulator nodes(s).
 
-additionalProperties: false
-
 required:
   - compatible
   - qcom,pmic-id
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm6150-rpmh-regulators
+    then:
+      properties:
+        vdd-l2-l3-supply: true
+        vdd-l4-l7-l8-supply: true
+        vdd-l5-l16-l17-l18-l19-supply: true
+        vdd-l10-l14-l15-supply: true
+        vdd-l11-l12-l13-supply: true
+      patternProperties:
+        "^vdd-l[169]-supply$": true
+        "^vdd-s[1-5]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm6150l-rpmh-regulators
+    then:
+      properties:
+        vdd-bob-supply:
+          description: BOB regulator parent supply phandle.
+        vdd-l1-l8-supply: true
+        vdd-l2-l3-supply: true
+        vdd-l4-l5-l6-supply: true
+        vdd-l7-l11-supply: true
+        vdd-l9-l10-supply: true
+      patternProperties:
+        "^vdd-s[1-8]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm7325-rpmh-regulators
+    then:
+      properties:
+        vdd-l1-l4-l12-l15-supply: true
+        vdd-l2-l7-supply: true
+        vdd-l6-l9-l10-supply: true
+        vdd-l11-l17-l18-l19-supply: true
+        vdd-l13-supply: true
+        vdd-l14-l16-supply: true
+      patternProperties:
+        "^vdd-l[358]-supply$": true
+        "^vdd-s[1-8]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8005-rpmh-regulators
+    then:
+      patternProperties:
+        "^vdd-s[1-4]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8009-rpmh-regulators
+            - qcom,pm8009-1-rpmh-regulators
+    then:
+      properties:
+        vdd-l5-l6-supply: true
+      patternProperties:
+        "^vdd-l[1-47]-supply$": true
+        "^vdd-s[1-2]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8150-rpmh-regulators
+            - qcom,pmm8155au-rpmh-regulators
+    then:
+      properties:
+        vdd-l1-l8-l11-supply: true
+        vdd-l2-l10-supply: true
+        vdd-l3-l4-l5-l18-supply: true
+        vdd-l6-l9-supply: true
+        vdd-l7-l12-l14-l15-supply: true
+        vdd-l13-l16-l17-supply: true
+      patternProperties:
+        "^vdd-s([1-9]|10)-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8150l-rpmh-regulators
+    then:
+      properties:
+        vdd-bob-supply:
+          description: BOB regulator parent supply phandle.
+        vdd-l1-l8-supply: true
+        vdd-l2-l3-supply: true
+        vdd-l4-l5-l6-supply: true
+        vdd-l7-l11-supply: true
+        vdd-l9-l10-supply: true
+      patternProperties:
+        "^vdd-s[1-8]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8350-rpmh-regulators
+    then:
+      properties:
+        vdd-l1-l4-supply: true
+        vdd-l2-l7-supply: true
+        vdd-l3-l5-supply: true
+        vdd-l6-l9-l10-supply: true
+        vdd-l8-supply: true
+      patternProperties:
+        "^vdd-s([1-9]|1[0-2])-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8350c-rpmh-regulators
+    then:
+      properties:
+        vdd-bob-supply:
+          description: BOB regulator parent supply phandle.
+        vdd-l1-l12-supply: true
+        vdd-l2-l8-supply: true
+        vdd-l3-l4-l5-l7-l13-supply: true
+        vdd-l6-l9-l11-supply: true
+        vdd-l10-supply: true
+      patternProperties:
+        "^vdd-s([1-9]|10)-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8450-rpmh-regulators
+    then:
+      patternProperties:
+        "^vdd-l[1-4]-supply$": true
+        "^vdd-s[1-6]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8998-rpmh-regulators
+    then:
+      properties:
+        vdd-l1-l27-supply: true
+        vdd-l2-l8-l17-supply: true
+        vdd-l3-l11-supply: true
+        vdd-l4-l5-supply: true
+        vdd-l6-supply: true
+        vdd-l7-l12-l14-l15-supply: true
+        vdd-l9-supply: true
+        vdd-l10-l23-l25-supply: true
+        vdd-l13-l19-l21-supply: true
+        vdd-l16-l28-supply: true
+        vdd-l18-l22-supply: true
+        vdd-l20-l24-supply: true
+        vdd-l26-supply: true
+        vin-lvs-1-2-supply: true
+      patternProperties:
+        "^vdd-s([1-9]|1[0-3])-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pmg1110-rpmh-regulators
+    then:
+      properties:
+        vdd-s1-supply: true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pmi8998-rpmh-regulators
+    then:
+      properties:
+        vdd-bob-supply:
+          description: BOB regulator parent supply phandle.
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pmr735a-rpmh-regulators
+    then:
+      properties:
+        vdd-l1-l2-supply: true
+        vdd-l3-supply: true
+        vdd-l4-supply: true
+        vdd-l5-l6-supply: true
+        vdd-l7-bob-supply: true
+      patternProperties:
+        "^vdd-s[1-3]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pmx55-rpmh-regulators
+    then:
+      properties:
+        vdd-l1-l2-supply: true
+        vdd-l3-l9-supply: true
+        vdd-l4-l12-supply: true
+        vdd-l5-l6-supply: true
+        vdd-l7-l8-supply: true
+        vdd-l10-l11-l13-supply: true
+      patternProperties:
+        "^vdd-l1[4-6]-supply$": true
+        "^vdd-s[1-7]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pmx65-rpmh-regulators
+    then:
+      properties:
+        vdd-l2-l18-supply: true
+        vdd-l5-l6-l16-supply: true
+        vdd-l8-l9-supply: true
+        vdd-l11-l13-supply: true
+      patternProperties:
+        "^vdd-l[1347]-supply$": true
+        "^vdd-l1[0245789]-supply$": true
+        "^vdd-l2[01]-supply$": true
+        "^vdd-s[1-8]-supply$": true
+
+unevaluatedProperties: false
+
 examples:
   - |
     #include <dt-bindings/regulator/qcom,rpmh-regulator.h>
diff --git a/Documentation/devicetree/bindings/regulator/richtek,rt4801-regulator.yaml b/Documentation/devicetree/bindings/regulator/richtek,rt4801-regulator.yaml
index 235e593b3b2c..091150c4e579 100644
--- a/Documentation/devicetree/bindings/regulator/richtek,rt4801-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/richtek,rt4801-regulator.yaml
@@ -17,9 +17,6 @@ description: |
   Datasheet is available at
   https://www.richtek.com/assets/product_file/RT4801H/DS4801H-00.pdf
 
-#The valid names for RT4801 regulator nodes are:
-#DSVP, DSVN
-
 properties:
   compatible:
     enum:
@@ -33,10 +30,13 @@ properties:
       The first one is ENP to enable DSVP, and second one is ENM to enable DSVN.
       Number of GPIO in the array list could be 1 or 2.
       If only one gpio is specified, only one gpio used to control ENP/ENM.
-      Else both are spefied, DSVP/DSVN could be controlled individually.
-      Othersie, this property not specified. treat both as always-on regulator.
+      Else if both are specified, DSVP/DSVN could be controlled individually.
+      If this property not specified, treat both as always-on regulators.
+
+      Property is deprecated. Use enable-gpios in each regulator.
     minItems: 1
     maxItems: 2
+    deprecated: true
 
 patternProperties:
   "^DSV(P|N)$":
@@ -45,6 +45,14 @@ patternProperties:
     description:
       Properties for single display bias regulator.
 
+    properties:
+      enable-gpios:
+        description:
+          GPIO to use to enable DSVP/DSVN regulator. One GPIO can be configured
+          for controlling both regulators.  If this property not specified for
+          any regulator, treat both as always-on regulators.
+        maxItems: 1
+
 required:
   - compatible
   - reg
@@ -60,19 +68,20 @@ examples:
         rt4801@73 {
             compatible = "richtek,rt4801";
             reg = <0x73>;
-            enable-gpios = <&gpio26 2 0>, <&gpio26 3 0>;
 
             dsvp: DSVP {
                 regulator-name = "rt4801,dsvp";
                 regulator-min-microvolt = <4000000>;
                 regulator-max-microvolt = <6000000>;
                 regulator-boot-on;
+                enable-gpios = <&gpio26 2 0>;
             };
             dsvn: DSVN {
                 regulator-name = "rt4801,dsvn";
                 regulator-min-microvolt = <4000000>;
                 regulator-max-microvolt = <6000000>;
                 regulator-boot-on;
+                enable-gpios = <&gpio26 3 0>;
             };
 
         };
diff --git a/Documentation/devicetree/bindings/regulator/richtek,rt5759-regulator.yaml b/Documentation/devicetree/bindings/regulator/richtek,rt5759-regulator.yaml
new file mode 100644
index 000000000000..0a4c9576a432
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/richtek,rt5759-regulator.yaml
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/richtek,rt5759-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Richtek RT5759 High Performance DCDC Converter
+
+maintainers:
+  - ChiYuan Huang <cy_huang@richtek.com>
+
+description: |
+  The RT5759 is a high-performance, synchronous step-down DC-DC converter that
+  can deliver up to 9A output current from 3V to 6.5V input supply, The output
+  voltage can be programmable with I2C controlled 7-Bit VID.
+
+  Datasheet is available at
+  https://www.richtek.com/assets/product_file/RT5759/DS5759-00.pdf
+
+properties:
+  compatible:
+    enum:
+      - richtek,rt5759
+      - richtek,rt5759a
+
+  reg:
+    maxItems: 1
+
+  regulator-allowed-modes:
+    description: |
+      buck allowed operating mode
+        0: auto mode (PSKIP: pulse skipping)
+        1: force pwm mode
+    items:
+      enum: [0, 1]
+
+  richtek,watchdog-enable:
+    description: enable the external watchdog reset pin listening
+    type: boolean
+
+allOf:
+  - $ref: regulator.yaml#
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: richtek,rt5759
+    then:
+      properties:
+        richtek,watchdog-enable: false
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  # example 1 for RT5759
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      regulator@62 {
+        compatible = "richtek,rt5759";
+        reg = <0x62>;
+        regulator-name = "rt5759-buck";
+        regulator-min-microvolt = <600000>;
+        regulator-max-microvolt = <1500000>;
+        regulator-boot-on;
+      };
+    };
+  # example 2 for RT5759A
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      regulator@62 {
+        compatible = "richtek,rt5759a";
+        reg = <0x62>;
+        regulator-name = "rt5759a-buck";
+        regulator-min-microvolt = <600000>;
+        regulator-max-microvolt = <1725000>;
+        regulator-boot-on;
+        richtek,watchdog-enable;
+      };
+    };
diff --git a/Documentation/devicetree/bindings/regulator/siliconmitus,sm5703-regulator.yaml b/Documentation/devicetree/bindings/regulator/siliconmitus,sm5703-regulator.yaml
new file mode 100644
index 000000000000..9d84117530ca
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/siliconmitus,sm5703-regulator.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/siliconmitus,sm5703-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Silicon Mitus SM5703 multi function device regulators
+
+maintainers:
+  - Markuss Broks <markuss.broks@gmail.com>
+
+description: |
+  SM5703 regulators node should be a sub node of the SM5703 MFD node. See SM5703 MFD
+  bindings at Documentation/devicetree/bindings/mfd/siliconmitus,sm5703.yaml
+  Regulator nodes should be named as USBLDO_<number>, BUCK, VBUS, LDO_<number>.
+  The definition for each of these nodes is defined using the standard
+  binding for regulators at Documentation/devicetree/bindings/regulator/regulator.txt.
+
+properties:
+  buck:
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+    description:
+      Properties for the BUCK regulator.
+
+  vbus:
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+    description:
+      Properties for the VBUS regulator.
+
+patternProperties:
+  "^ldo[1-3]$":
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+    description:
+      Properties for single LDO regulator.
+
+  "^usbldo[1-2]$":
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+    description:
+      Properties for a single USBLDO regulator.
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/regulator/socionext,uniphier-regulator.yaml b/Documentation/devicetree/bindings/regulator/socionext,uniphier-regulator.yaml
index 1218f21ba320..75087c6e001c 100644
--- a/Documentation/devicetree/bindings/regulator/socionext,uniphier-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/socionext,uniphier-regulator.yaml
@@ -14,9 +14,6 @@ description: |
 maintainers:
   - Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
 
-allOf:
-  - $ref: "regulator.yaml#"
-
 # USB3 Controller
 
 properties:
@@ -36,27 +33,51 @@ properties:
     minItems: 1
     maxItems: 2
 
-  clock-names:
-    oneOf:
-      - items:          # for Pro4, Pro5
-          - const: gio
-          - const: link
-      - items:          # for others
-          - const: link
+  clock-names: true
 
   resets:
     minItems: 1
     maxItems: 2
 
-  reset-names:
-    oneOf:
-      - items:          # for Pro4, Pro5
-          - const: gio
-          - const: link
-      - items:
-          - const: link
+  reset-names: true
 
-additionalProperties: false
+allOf:
+  - $ref: "regulator.yaml#"
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - socionext,uniphier-pro4-usb3-regulator
+              - socionext,uniphier-pro5-usb3-regulator
+    then:
+      properties:
+        clocks:
+          minItems: 2
+          maxItems: 2
+        clock-names:
+          items:
+            - const: gio
+            - const: link
+        resets:
+          minItems: 2
+          maxItems: 2
+        reset-names:
+          items:
+            - const: gio
+            - const: link
+    else:
+      properties:
+        clocks:
+          maxItems: 1
+        clock-names:
+          const: link
+        resets:
+          maxItems: 1
+        reset-names:
+          const: link
+
+unevaluatedProperties: false
 
 required:
   - compatible
diff --git a/Documentation/devicetree/bindings/reserved-memory/phram.yaml b/Documentation/devicetree/bindings/reserved-memory/phram.yaml
new file mode 100644
index 000000000000..6c4db28015f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/reserved-memory/phram.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/reserved-memory/phram.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MTD/block device in RAM
+
+description: |
+  Specifies that the reserved memory region can be used as an MTD or block
+  device.
+
+  The "phram" node is named after the "MTD in PHysical RAM" driver which
+  provides an implementation of this functionality in Linux.
+
+maintainers:
+  - Vincent Whitchurch <vincent.whitchurch@axis.com>
+
+allOf:
+  - $ref: "reserved-memory.yaml"
+  - $ref: "/schemas/mtd/mtd.yaml"
+
+properties:
+  compatible:
+    const: phram
+
+  reg:
+    description: region of memory that can be used as an MTD/block device
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    reserved-memory {
+        #address-cells = <1>;
+        #size-cells = <1>;
+
+        phram: flash@12340000 {
+            compatible = "phram";
+            label = "rootfs";
+            reg = <0x12340000 0x00800000>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml b/Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml
new file mode 100644
index 000000000000..fa8f4ac20985
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/aspeed,ast2600-fmc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Aspeed SMC controllers bindings
+
+maintainers:
+  - Chin-Ting Kuo <chin-ting_kuo@aspeedtech.com>
+  - Cédric Le Goater <clg@kaod.org>
+
+description: |
+  This binding describes the Aspeed Static Memory Controllers (FMC and
+  SPI) of the AST2400, AST2500 and AST2600 SOCs.
+
+allOf:
+  - $ref: "spi-controller.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - aspeed,ast2600-fmc
+      - aspeed,ast2600-spi
+      - aspeed,ast2500-fmc
+      - aspeed,ast2500-spi
+      - aspeed,ast2400-fmc
+      - aspeed,ast2400-spi
+
+  reg:
+    items:
+      - description: registers
+      - description: memory mapping
+
+  clocks:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/aspeed-scu-ic.h>
+    #include <dt-bindings/clock/ast2600-clock.h>
+
+    spi@1e620000 {
+        reg = <0x1e620000 0xc4>, <0x20000000 0x10000000>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+        compatible = "aspeed,ast2600-fmc";
+        clocks = <&syscon ASPEED_CLK_AHB>;
+        interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
+
+        flash@0 {
+                reg = < 0 >;
+                compatible = "jedec,spi-nor";
+                spi-max-frequency = <50000000>;
+                spi-rx-bus-width = <2>;
+        };
+
+        flash@1 {
+                reg = < 1 >;
+                compatible = "jedec,spi-nor";
+                spi-max-frequency = <50000000>;
+                spi-rx-bus-width = <2>;
+        };
+
+        flash@2 {
+                reg = < 2 >;
+                compatible = "jedec,spi-nor";
+                spi-max-frequency = <50000000>;
+                spi-rx-bus-width = <2>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/spi/ingenic,spi.yaml b/Documentation/devicetree/bindings/spi/ingenic,spi.yaml
index 5b1c7a2a6a31..360f76c226d9 100644
--- a/Documentation/devicetree/bindings/spi/ingenic,spi.yaml
+++ b/Documentation/devicetree/bindings/spi/ingenic,spi.yaml
@@ -18,7 +18,10 @@ properties:
     oneOf:
       - enum:
           - ingenic,jz4750-spi
+          - ingenic,jz4775-spi
           - ingenic,jz4780-spi
+          - ingenic,x1000-spi
+          - ingenic,x2000-spi
       - items:
           - enum:
               - ingenic,jz4760-spi
diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
index 818130b11bb9..94ef0552bd42 100644
--- a/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
+++ b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
@@ -53,16 +53,20 @@ properties:
     maxItems: 1
 
   clocks:
+    minItems: 3
     items:
       - description: clock used for the parent clock
       - description: clock used for the muxes clock
       - description: clock used for the clock gate
+      - description: clock used for the AHB bus, this clock is optional
 
   clock-names:
+    minItems: 3
     items:
       - const: parent-clk
       - const: sel-clk
       - const: spi-clk
+      - const: hclk
 
   mediatek,pad-select:
     $ref: /schemas/types.yaml#/definitions/uint32-array
diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-snfi.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-snfi.yaml
new file mode 100644
index 000000000000..6e6e02c91780
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-snfi.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/mediatek,spi-mtk-snfi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SPI-NAND flash controller for MediaTek ARM SoCs
+
+maintainers:
+  - Chuanhong Guo <gch981213@gmail.com>
+
+description: |
+  The Mediatek SPI-NAND flash controller is an extended version of
+  the Mediatek NAND flash controller. It can perform standard SPI
+  instructions with one continuous write and one read for up-to 0xa0
+  bytes. It also supports typical SPI-NAND page cache operations
+  in single, dual or quad IO mode with pipelined ECC encoding/decoding
+  using the accompanying ECC engine. There should be only one spi
+  slave device following generic spi bindings.
+
+allOf:
+  - $ref: /schemas/spi/spi-controller.yaml#
+
+properties:
+  compatible:
+    enum:
+      - mediatek,mt7622-snand
+      - mediatek,mt7629-snand
+
+  reg:
+    items:
+      - description: core registers
+
+  interrupts:
+    items:
+      - description: NFI interrupt
+
+  clocks:
+    items:
+      - description: clock used for the controller
+      - description: clock used for the SPI bus
+
+  clock-names:
+    items:
+      - const: nfi_clk
+      - const: pad_clk
+
+  nand-ecc-engine:
+    description: device-tree node of the accompanying ECC engine.
+    $ref: /schemas/types.yaml#/definitions/phandle
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - nand-ecc-engine
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/mt7622-clk.h>
+    soc {
+      #address-cells = <2>;
+      #size-cells = <2>;
+      snfi: spi@1100d000 {
+        compatible = "mediatek,mt7622-snand";
+        reg = <0 0x1100d000 0 0x1000>;
+        interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_LOW>;
+        clocks = <&pericfg CLK_PERI_NFI_PD>, <&pericfg CLK_PERI_SNFI_PD>;
+        clock-names = "nfi_clk", "pad_clk";
+        nand-ecc-engine = <&bch>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        flash@0 {
+          compatible = "spi-nand";
+          reg = <0>;
+          spi-tx-bus-width = <4>;
+          spi-rx-bus-width = <4>;
+          nand-ecc-engine = <&snfi>;
+        };
+      };
+    };
diff --git a/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml b/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml
index 5a60fba14bba..44d08aa3fd85 100644
--- a/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml
+++ b/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml
@@ -49,6 +49,7 @@ properties:
     maxItems: 2
 
   interconnect-names:
+    minItems: 1
     items:
       - const: qspi-config
       - const: qspi-memory
diff --git a/Documentation/devicetree/bindings/spi/renesas,rspi.yaml b/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
index 2c3c6bd6ec45..f45d3b75d6de 100644
--- a/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
+++ b/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
@@ -21,6 +21,7 @@ properties:
           - enum:
               - renesas,rspi-r7s72100  # RZ/A1H
               - renesas,rspi-r7s9210   # RZ/A2
+              - renesas,r9a07g043-rspi # RZ/G2UL
               - renesas,r9a07g044-rspi # RZ/G2{L,LC}
               - renesas,r9a07g054-rspi # RZ/V2L
           - const: renesas,rspi-rz
@@ -124,6 +125,7 @@ allOf:
           contains:
             enum:
               - renesas,qspi
+              - renesas,r9a07g043-rspi
               - renesas,r9a07g044-rspi
               - renesas,r9a07g054-rspi
     then:
diff --git a/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
index a9b7388ca9ac..e1587ddf7de3 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
@@ -18,6 +18,7 @@ description:
 properties:
   compatible:
     enum:
+      - qcom,sc8180x-lmh
       - qcom,sdm845-lmh
       - qcom,sm8150-lmh
 
diff --git a/Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm5.yaml b/Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm5.yaml
index 3ea8c0c1f45f..feb390d50696 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm5.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm5.yaml
@@ -10,7 +10,9 @@ maintainers:
 
 properties:
   compatible:
-    const: qcom,spmi-adc-tm5
+    enum:
+      - qcom,spmi-adc-tm5
+      - qcom,spmi-adc-tm5-gen2
 
   reg:
     maxItems: 1
@@ -33,6 +35,7 @@ properties:
   qcom,avg-samples:
     $ref: /schemas/types.yaml#/definitions/uint32
     description: Number of samples to be used for measurement.
+            Not applicable for Gen2 ADC_TM peripheral.
     enum:
       - 1
       - 2
@@ -45,6 +48,7 @@ properties:
     $ref: /schemas/types.yaml#/definitions/uint32
     description: This parameter is used to decrease ADC sampling rate.
             Quicker measurements can be made by reducing decimation ratio.
+            Not applicable for Gen2 ADC_TM peripheral.
     enum:
       - 250
       - 420
@@ -93,6 +97,29 @@ patternProperties:
           - const: 1
           - enum: [ 1, 3, 4, 6, 20, 8, 10 ]
 
+      qcom,avg-samples:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description: Number of samples to be used for measurement.
+          This property in child node is applicable only for Gen2 ADC_TM peripheral.
+        enum:
+          - 1
+          - 2
+          - 4
+          - 8
+          - 16
+        default: 1
+
+      qcom,decimation:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description: This parameter is used to decrease ADC sampling rate.
+          Quicker measurements can be made by reducing decimation ratio.
+          This property in child node is applicable only for Gen2 ADC_TM peripheral.
+        enum:
+          - 85
+          - 340
+          - 1360
+        default: 1360
+
     required:
       - reg
       - io-channels
@@ -100,6 +127,31 @@ patternProperties:
     additionalProperties:
       false
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: qcom,spmi-adc-tm5
+
+    then:
+      patternProperties:
+        "^([-a-z0-9]*)@[0-7]$":
+          properties:
+            qcom,decimation: false
+            qcom,avg-samples: false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: qcom,spmi-adc-tm5-gen2
+
+    then:
+      properties:
+        qcom,avg-samples: false
+        qcom,decimation: false
+
 required:
   - compatible
   - reg
@@ -124,7 +176,7 @@ examples:
             #size-cells = <0>;
             #io-channel-cells = <1>;
 
-            /* Other propreties are omitted */
+            /* Other properties are omitted */
             conn-therm@4f {
                 reg = <ADC5_AMUX_THM3_100K_PU>;
                 qcom,ratiometric;
@@ -148,4 +200,58 @@ examples:
             };
         };
     };
+
+  - |
+    #include <dt-bindings/iio/qcom,spmi-adc7-pmk8350.h>
+    #include <dt-bindings/iio/qcom,spmi-adc7-pm8350.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    spmi_bus {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        pmk8350_vadc: adc@3100 {
+            reg = <0x3100>;
+            compatible = "qcom,spmi-adc7";
+            #address-cells = <1>;
+            #size-cells = <0>;
+            #io-channel-cells = <1>;
+
+            /* Other properties are omitted */
+            xo-therm@44 {
+                reg = <PMK8350_ADC7_AMUX_THM1_100K_PU>;
+                qcom,ratiometric;
+                qcom,hw-settle-time = <200>;
+            };
+
+            conn-therm@47 {
+                reg = <PM8350_ADC7_AMUX_THM4_100K_PU>;
+                qcom,ratiometric;
+                qcom,hw-settle-time = <200>;
+            };
+        };
+
+        pmk8350_adc_tm: adc-tm@3400 {
+            compatible = "qcom,spmi-adc-tm5-gen2";
+            reg = <0x3400>;
+            interrupts = <0x0 0x34 0x0 IRQ_TYPE_EDGE_RISING>;
+            #thermal-sensor-cells = <1>;
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            pmk8350-xo-therm@0 {
+                reg = <0>;
+                io-channels = <&pmk8350_vadc PMK8350_ADC7_AMUX_THM1_100K_PU>;
+                qcom,decimation = <340>;
+                qcom,ratiometric;
+                qcom,hw-settle-time-us = <200>;
+            };
+
+            conn-therm@1 {
+                reg = <1>;
+                io-channels = <&pmk8350_vadc PM8350_ADC7_AMUX_THM4_100K_PU>;
+                qcom,avg-samples = <2>;
+                qcom,ratiometric;
+                qcom,hw-settle-time-us = <200>;
+            };
+        };
+    };
 ...
diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
index b6406bcc683f..a24baf9b9f64 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
@@ -19,10 +19,11 @@ description: |
 properties:
   compatible:
     oneOf:
-      - description: msm9860 TSENS based
+      - description: msm8960 TSENS based
         items:
           - enum:
               - qcom,ipq8064-tsens
+              - qcom,msm8960-tsens
 
       - description: v0.1 of TSENS
         items:
@@ -49,6 +50,7 @@ properties:
               - qcom,sc7180-tsens
               - qcom,sc7280-tsens
               - qcom,sc8180x-tsens
+              - qcom,sc8280xp-tsens
               - qcom,sdm630-tsens
               - qcom,sdm845-tsens
               - qcom,sm8150-tsens
@@ -116,6 +118,7 @@ allOf:
               - qcom,ipq8064-tsens
               - qcom,mdm9607-tsens
               - qcom,msm8916-tsens
+              - qcom,msm8960-tsens
               - qcom,msm8974-tsens
               - qcom,msm8976-tsens
               - qcom,qcs404-tsens
diff --git a/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml b/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml
index ccab9511a042..1d8373397848 100644
--- a/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml
@@ -17,7 +17,9 @@ properties:
   compatible:
     items:
       - enum:
+          - renesas,r9a07g043-tsu # RZ/G2UL
           - renesas,r9a07g044-tsu # RZ/G2{L,LC}
+          - renesas,r9a07g054-tsu # RZ/V2L
       - const: renesas,rzg2l-tsu
 
   reg:
diff --git a/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml b/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml
new file mode 100644
index 000000000000..c74f124ebfc0
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/ti,j72xx-thermal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments J72XX VTM (DTS) binding
+
+maintainers:
+  - Keerthy <j-keerthy@ti.com>
+
+properties:
+  compatible:
+    enum:
+      - ti,j721e-vtm
+      - ti,j7200-vtm
+
+  reg:
+    items:
+      - description: VTM cfg1 register space
+      - description: VTM cfg2 register space
+      - description: VTM efuse register space
+
+  power-domains:
+    maxItems: 1
+
+  "#thermal-sensor-cells":
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - power-domains
+  - "#thermal-sensor-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/soc/ti,sci_pm_domain.h>
+    wkup_vtm0: thermal-sensor@42040000 {
+        compatible = "ti,j721e-vtm";
+        reg = <0x42040000 0x350>,
+              <0x42050000 0x350>,
+              <0x43000300 0x10>;
+        power-domains = <&k3_pds 154 TI_SCI_PD_EXCLUSIVE>;
+        #thermal-sensor-cells = <1>;
+    };
+
+    mpu_thermal: mpu-thermal {
+        polling-delay-passive = <250>; /* milliseconds */
+        polling-delay = <500>; /* milliseconds */
+        thermal-sensors = <&wkup_vtm0 0>;
+
+        trips {
+            mpu_crit: mpu-crit {
+                temperature = <125000>; /* milliCelsius */
+                hysteresis = <2000>; /* milliCelsius */
+                type = "critical";
+            };
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index 550a2e5c9e05..c11520347a9d 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -143,6 +143,10 @@ properties:
           - infineon,xdpe12254
             # Infineon Multi-phase Digital VR Controller xdpe12284
           - infineon,xdpe12284
+            # Infineon Multi-phase Digital VR Controller xdpe15284
+          - infineon,xdpe15284
+            # Infineon Multi-phase Digital VR Controller xdpe152c4
+          - infineon,xdpe152c4
             # Injoinic IP5108 2.0A Power Bank IC with I2C
           - injoinic,ip5108
             # Injoinic IP5109 2.1A Power Bank IC with I2C
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index e12a75e10456..2bf2b3accc8e 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -1084,6 +1084,8 @@ patternProperties:
     description: Sensirion AG
   "^sensortek,.*":
     description: Sensortek Technology Corporation
+  "^sercomm,.*":
+    description: Sercomm (Suzhou) Corporation
   "^sff,.*":
     description: Small Form Factor Committee
   "^sgd,.*":
diff --git a/Documentation/doc-guide/contributing.rst b/Documentation/doc-guide/contributing.rst
index 207fd93d7c80..d4793826ad9a 100644
--- a/Documentation/doc-guide/contributing.rst
+++ b/Documentation/doc-guide/contributing.rst
@@ -79,8 +79,9 @@ simplistic idea of what C comment blocks look like.  This problem had been
 present since that comment was added in 2016 — a full four years.  Fixing
 it was a matter of adding the missing asterisks.  A quick look at the
 history for that file showed what the normal format for subject lines is,
-and ``scripts/get_maintainer.pl`` told me who should receive it.  The
-resulting patch looked like this::
+and ``scripts/get_maintainer.pl`` told me who should receive it (pass paths to
+your patches as arguments to scripts/get_maintainer.pl).  The resulting patch
+looked like this::
 
   [PATCH] PM / devfreq: Fix two malformed kerneldoc comments
 
diff --git a/Documentation/doc-guide/kernel-doc.rst b/Documentation/doc-guide/kernel-doc.rst
index 79aaa55d6bcf..a7cb2afd7990 100644
--- a/Documentation/doc-guide/kernel-doc.rst
+++ b/Documentation/doc-guide/kernel-doc.rst
@@ -1,3 +1,4 @@
+===========================
 Writing kernel-doc comments
 ===========================
 
@@ -436,6 +437,7 @@ The title following ``DOC:`` acts as a heading within the source file, but also
 as an identifier for extracting the documentation comment. Thus, the title must
 be unique within the file.
 
+=============================
 Including kernel-doc comments
 =============================
 
diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst
index bb36f18ae9ac..2ff1ab4158d4 100644
--- a/Documentation/doc-guide/sphinx.rst
+++ b/Documentation/doc-guide/sphinx.rst
@@ -1,7 +1,8 @@
 .. _sphinxdoc:
 
-Introduction
-============
+=====================================
+Using Sphinx for kernel documentation
+=====================================
 
 The Linux kernel uses `Sphinx`_ to generate pretty documentation from
 `reStructuredText`_ files under ``Documentation``. To build the documentation in
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 910b30a2a7d9..352ff53a2306 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -211,6 +211,7 @@ r200_reg_safe.h
 r300_reg_safe.h
 r420_reg_safe.h
 r600_reg_safe.h
+randstruct.seed
 randomize_layout_hash.h
 randomize_layout_seed.h
 recordmcount
diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst
index 5018403fe82f..2d39967bafcc 100644
--- a/Documentation/driver-api/driver-model/devres.rst
+++ b/Documentation/driver-api/driver-model/devres.rst
@@ -249,7 +249,7 @@ CLOCK
   devm_clk_bulk_get()
   devm_clk_bulk_get_all()
   devm_clk_bulk_get_optional()
-  devm_get_clk_from_childl()
+  devm_get_clk_from_child()
   devm_clk_hw_register()
   devm_of_clk_add_hw_provider()
   devm_clk_hw_register_clkdev()
diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst
index bbc53920d4dd..a1ddefa1f55f 100644
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@@ -417,30 +417,66 @@ struct gpio_irq_chip inside struct gpio_chip before adding the gpio_chip.
 If you do this, the additional irq_chip will be set up by gpiolib at the
 same time as setting up the rest of the GPIO functionality. The following
 is a typical example of a chained cascaded interrupt handler using
-the gpio_irq_chip:
+the gpio_irq_chip. Note how the mask/unmask (or disable/enable) functions
+call into the core gpiolib code:
 
 .. code-block:: c
 
-  /* Typical state container with dynamic irqchip */
+  /* Typical state container */
   struct my_gpio {
       struct gpio_chip gc;
-      struct irq_chip irq;
+  };
+
+  static void my_gpio_mask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      /*
+       * Perform any necessary action to mask the interrupt,
+       * and then call into the core code to synchronise the
+       * state.
+       */
+
+      gpiochip_disable_irq(gc, d->hwirq);
+  }
+
+  static void my_gpio_unmask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      gpiochip_enable_irq(gc, d->hwirq);
+
+      /*
+       * Perform any necessary action to unmask the interrupt,
+       * after having called into the core code to synchronise
+       * the state.
+       */
+  }
+
+  /*
+   * Statically populate the irqchip. Note that it is made const
+   * (further indicated by the IRQCHIP_IMMUTABLE flag), and that
+   * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra
+   * callbacks to the structure.
+   */
+  static const struct irq_chip my_gpio_irq_chip = {
+      .name		= "my_gpio_irq",
+      .irq_ack		= my_gpio_ack_irq,
+      .irq_mask		= my_gpio_mask_irq,
+      .irq_unmask	= my_gpio_unmask_irq,
+      .irq_set_type	= my_gpio_set_irq_type,
+      .flags		= IRQCHIP_IMMUTABLE,
+      /* Provide the gpio resource callbacks */
+      GPIOCHIP_IRQ_RESOURCE_HELPERS,
   };
 
   int irq; /* from platform etc */
   struct my_gpio *g;
   struct gpio_irq_chip *girq;
 
-  /* Set up the irqchip dynamically */
-  g->irq.name = "my_gpio_irq";
-  g->irq.irq_ack = my_gpio_ack_irq;
-  g->irq.irq_mask = my_gpio_mask_irq;
-  g->irq.irq_unmask = my_gpio_unmask_irq;
-  g->irq.irq_set_type = my_gpio_set_irq_type;
-
   /* Get a pointer to the gpio_irq_chip */
   girq = &g->gc.irq;
-  girq->chip = &g->irq;
+  gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip);
   girq->parent_handler = ftgpio_gpio_irq_handler;
   girq->num_parents = 1;
   girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
@@ -458,23 +494,58 @@ the interrupt separately and go with it:
 
 .. code-block:: c
 
-  /* Typical state container with dynamic irqchip */
+  /* Typical state container */
   struct my_gpio {
       struct gpio_chip gc;
-      struct irq_chip irq;
+  };
+
+  static void my_gpio_mask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      /*
+       * Perform any necessary action to mask the interrupt,
+       * and then call into the core code to synchronise the
+       * state.
+       */
+
+      gpiochip_disable_irq(gc, d->hwirq);
+  }
+
+  static void my_gpio_unmask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      gpiochip_enable_irq(gc, d->hwirq);
+
+      /*
+       * Perform any necessary action to unmask the interrupt,
+       * after having called into the core code to synchronise
+       * the state.
+       */
+  }
+
+  /*
+   * Statically populate the irqchip. Note that it is made const
+   * (further indicated by the IRQCHIP_IMMUTABLE flag), and that
+   * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra
+   * callbacks to the structure.
+   */
+  static const struct irq_chip my_gpio_irq_chip = {
+      .name		= "my_gpio_irq",
+      .irq_ack		= my_gpio_ack_irq,
+      .irq_mask		= my_gpio_mask_irq,
+      .irq_unmask	= my_gpio_unmask_irq,
+      .irq_set_type	= my_gpio_set_irq_type,
+      .flags		= IRQCHIP_IMMUTABLE,
+      /* Provide the gpio resource callbacks */
+      GPIOCHIP_IRQ_RESOURCE_HELPERS,
   };
 
   int irq; /* from platform etc */
   struct my_gpio *g;
   struct gpio_irq_chip *girq;
 
-  /* Set up the irqchip dynamically */
-  g->irq.name = "my_gpio_irq";
-  g->irq.irq_ack = my_gpio_ack_irq;
-  g->irq.irq_mask = my_gpio_mask_irq;
-  g->irq.irq_unmask = my_gpio_unmask_irq;
-  g->irq.irq_set_type = my_gpio_set_irq_type;
-
   ret = devm_request_threaded_irq(dev, irq, NULL,
 		irq_thread_fn, IRQF_ONESHOT, "my-chip", g);
   if (ret < 0)
@@ -482,7 +553,7 @@ the interrupt separately and go with it:
 
   /* Get a pointer to the gpio_irq_chip */
   girq = &g->gc.irq;
-  girq->chip = &g->irq;
+  gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip);
   /* This will let us handle the parent IRQ in the driver */
   girq->parent_handler = NULL;
   girq->num_parents = 0;
@@ -500,24 +571,61 @@ In this case the typical set-up will look like this:
   /* Typical state container with dynamic irqchip */
   struct my_gpio {
       struct gpio_chip gc;
-      struct irq_chip irq;
       struct fwnode_handle *fwnode;
   };
 
-  int irq; /* from platform etc */
+  static void my_gpio_mask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      /*
+       * Perform any necessary action to mask the interrupt,
+       * and then call into the core code to synchronise the
+       * state.
+       */
+
+      gpiochip_disable_irq(gc, d->hwirq);
+      irq_mask_mask_parent(d);
+  }
+
+  static void my_gpio_unmask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      gpiochip_enable_irq(gc, d->hwirq);
+
+      /*
+       * Perform any necessary action to unmask the interrupt,
+       * after having called into the core code to synchronise
+       * the state.
+       */
+
+      irq_mask_unmask_parent(d);
+  }
+
+  /*
+   * Statically populate the irqchip. Note that it is made const
+   * (further indicated by the IRQCHIP_IMMUTABLE flag), and that
+   * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra
+   * callbacks to the structure.
+   */
+  static const struct irq_chip my_gpio_irq_chip = {
+      .name		= "my_gpio_irq",
+      .irq_ack		= my_gpio_ack_irq,
+      .irq_mask		= my_gpio_mask_irq,
+      .irq_unmask	= my_gpio_unmask_irq,
+      .irq_set_type	= my_gpio_set_irq_type,
+      .flags		= IRQCHIP_IMMUTABLE,
+      /* Provide the gpio resource callbacks */
+      GPIOCHIP_IRQ_RESOURCE_HELPERS,
+  };
+
   struct my_gpio *g;
   struct gpio_irq_chip *girq;
 
-  /* Set up the irqchip dynamically */
-  g->irq.name = "my_gpio_irq";
-  g->irq.irq_ack = my_gpio_ack_irq;
-  g->irq.irq_mask = my_gpio_mask_irq;
-  g->irq.irq_unmask = my_gpio_unmask_irq;
-  g->irq.irq_set_type = my_gpio_set_irq_type;
-
   /* Get a pointer to the gpio_irq_chip */
   girq = &g->gc.irq;
-  girq->chip = &g->irq;
+  gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip);
   girq->default_type = IRQ_TYPE_NONE;
   girq->handler = handle_bad_irq;
   girq->fwnode = g->fwnode;
@@ -605,8 +713,9 @@ When implementing an irqchip inside a GPIO driver, these two functions should
 typically be called in the .irq_disable() and .irq_enable() callbacks from the
 irqchip.
 
-When using the gpiolib irqchip helpers, these callbacks are automatically
-assigned.
+When IRQCHIP_IMMUTABLE is not advertised by the irqchip, these callbacks
+are automatically assigned. This behaviour is deprecated and on its way
+to be removed from the kernel.
 
 
 Real-Time compliance for GPIO IRQ chips
diff --git a/Documentation/driver-api/libata.rst b/Documentation/driver-api/libata.rst
index d477e296bda5..311af516a3fd 100644
--- a/Documentation/driver-api/libata.rst
+++ b/Documentation/driver-api/libata.rst
@@ -424,12 +424,6 @@ How commands are issued
 -----------------------
 
 Internal commands
-    First, qc is allocated and initialized using :c:func:`ata_qc_new_init`.
-    Although :c:func:`ata_qc_new_init` doesn't implement any wait or retry
-    mechanism when qc is not available, internal commands are currently
-    issued only during initialization and error recovery, so no other
-    command is active and allocation is guaranteed to succeed.
-
     Once allocated qc's taskfile is initialized for the command to be
     executed. qc currently has two mechanisms to notify completion. One
     is via ``qc->complete_fn()`` callback and the other is completion
@@ -447,11 +441,6 @@ SCSI commands
     translated. No qc is involved in processing a simulated scmd. The
     result is computed right away and the scmd is completed.
 
-    For a translated scmd, :c:func:`ata_qc_new_init` is invoked to allocate a
-    qc and the scmd is translated into the qc. SCSI midlayer's
-    completion notification function pointer is stored into
-    ``qc->scsidone``.
-
     ``qc->complete_fn()`` callback is used for completion notification. ATA
     commands use :c:func:`ata_scsi_qc_complete` while ATAPI commands use
     :c:func:`atapi_qc_complete`. Both functions end up calling ``qc->scsidone``
diff --git a/Documentation/driver-api/media/cec-core.rst b/Documentation/driver-api/media/cec-core.rst
index c6194ee81c41..ae0d20798edc 100644
--- a/Documentation/driver-api/media/cec-core.rst
+++ b/Documentation/driver-api/media/cec-core.rst
@@ -109,6 +109,7 @@ your driver:
 		int (*adap_monitor_all_enable)(struct cec_adapter *adap, bool enable);
 		int (*adap_monitor_pin_enable)(struct cec_adapter *adap, bool enable);
 		int (*adap_log_addr)(struct cec_adapter *adap, u8 logical_addr);
+		void (*adap_configured)(struct cec_adapter *adap, bool configured);
 		int (*adap_transmit)(struct cec_adapter *adap, u8 attempts,
 				      u32 signal_free_time, struct cec_msg *msg);
 		void (*adap_status)(struct cec_adapter *adap, struct seq_file *file);
@@ -117,7 +118,7 @@ your driver:
 		/* Error injection callbacks */
 		...
 
-		/* High-level callbacks */
+		/* High-level callback */
 		...
 	};
 
@@ -178,6 +179,16 @@ can receive directed messages to that address.
 Note that adap_log_addr must return 0 if logical_addr is CEC_LOG_ADDR_INVALID.
 
 
+Called when the adapter is fully configured or unconfigured::
+
+	void (*adap_configured)(struct cec_adapter *adap, bool configured);
+
+If configured == true, then the adapter is fully configured, i.e. all logical
+addresses have been successfully claimed. If configured == false, then the
+adapter is unconfigured. If the driver has to take specific actions after
+(un)configuration, then that can be done through this optional callback.
+
+
 To transmit a new message::
 
 	int (*adap_transmit)(struct cec_adapter *adap, u8 attempts,
diff --git a/Documentation/driver-api/media/mc-core.rst b/Documentation/driver-api/media/mc-core.rst
index 57b5bbba944e..02481a2513b9 100644
--- a/Documentation/driver-api/media/mc-core.rst
+++ b/Documentation/driver-api/media/mc-core.rst
@@ -42,9 +42,16 @@ Allocation of the structure is handled by the media device driver, usually by
 embedding the :c:type:`media_device` instance in a larger driver-specific
 structure.
 
-Drivers register media device instances by calling
-:c:func:`__media_device_register()` via the macro ``media_device_register()``
-and unregistered by calling :c:func:`media_device_unregister()`.
+Drivers initialise media device instances by calling
+:c:func:`media_device_init()`. After initialising a media device instance, it is
+registered by calling :c:func:`__media_device_register()` via the macro
+``media_device_register()`` and unregistered by calling
+:c:func:`media_device_unregister()`. An initialised media device must be
+eventually cleaned up by calling :c:func:`media_device_cleanup()`.
+
+Note that it is not allowed to unregister a media device instance that was not
+previously registered, or clean up a media device instance that was not
+previously initialised.
 
 Entities
 ^^^^^^^^
diff --git a/Documentation/driver-api/media/v4l2-subdev.rst b/Documentation/driver-api/media/v4l2-subdev.rst
index 08ea2673b19e..cf3b52bdbfb9 100644
--- a/Documentation/driver-api/media/v4l2-subdev.rst
+++ b/Documentation/driver-api/media/v4l2-subdev.rst
@@ -518,6 +518,75 @@ The :c:func:`v4l2_i2c_new_subdev` function will call
 :c:type:`i2c_board_info` structure using the ``client_type`` and the
 ``addr`` to fill it.
 
+Centrally managed subdev active state
+-------------------------------------
+
+Traditionally V4L2 subdev drivers maintained internal state for the active
+device configuration. This is often implemented as e.g. an array of struct
+v4l2_mbus_framefmt, one entry for each pad, and similarly for crop and compose
+rectangles.
+
+In addition to the active configuration, each subdev file handle has an array of
+struct v4l2_subdev_pad_config, managed by the V4L2 core, which contains the try
+configuration.
+
+To simplify the subdev drivers the V4L2 subdev API now optionally supports a
+centrally managed active configuration represented by
+:c:type:`v4l2_subdev_state`. One instance of state, which contains the active
+device configuration, is stored in the sub-device itself as part of
+the :c:type:`v4l2_subdev` structure, while the core associates a try state to
+each open file handle, to store the try configuration related to that file
+handle.
+
+Sub-device drivers can opt-in and use state to manage their active configuration
+by initializing the subdevice state with a call to v4l2_subdev_init_finalize()
+before registering the sub-device. They must also call v4l2_subdev_cleanup()
+to release all the allocated resources before unregistering the sub-device.
+The core automatically allocates and initializes a state for each open file
+handle to store the try configurations and frees it when closing the file
+handle.
+
+V4L2 sub-device operations that use both the :ref:`ACTIVE and TRY formats
+<v4l2-subdev-format-whence>` receive the correct state to operate on through
+the 'state' parameter. The state must be locked and unlocked by the
+caller by calling :c:func:`v4l2_subdev_lock_state()` and
+:c:func:`v4l2_subdev_unlock_state()`. The caller can do so by calling the subdev
+operation through the :c:func:`v4l2_subdev_call_state_active()` macro.
+
+Operations that do not receive a state parameter implicitly operate on the
+subdevice active state, which drivers can exclusively access by
+calling :c:func:`v4l2_subdev_lock_and_get_active_state()`. The sub-device active
+state must equally be released by calling :c:func:`v4l2_subdev_unlock_state()`.
+
+Drivers must never manually access the state stored in the :c:type:`v4l2_subdev`
+or in the file handle without going through the designated helpers.
+
+While the V4L2 core passes the correct try or active state to the subdevice
+operations, many existing device drivers pass a NULL state when calling
+operations with :c:func:`v4l2_subdev_call()`. This legacy construct causes
+issues with subdevice drivers that let the V4L2 core manage the active state,
+as they expect to receive the appropriate state as a parameter. To help the
+conversion of subdevice drivers to a managed active state without having to
+convert all callers at the same time, an additional wrapper layer has been
+added to v4l2_subdev_call(), which handles the NULL case by geting and locking
+the callee's active state with :c:func:`v4l2_subdev_lock_and_get_active_state()`,
+and unlocking the state after the call.
+
+The whole subdev state is in reality split into three parts: the
+v4l2_subdev_state, subdev controls and subdev driver's internal state. In the
+future these parts should be combined into a single state. For the time being
+we need a way to handle the locking for these parts. This can be accomplished
+by sharing a lock. The v4l2_ctrl_handler already supports this via its 'lock'
+pointer and the same model is used with states. The driver can do the following
+before calling v4l2_subdev_init_finalize():
+
+.. code-block:: c
+
+	sd->ctrl_handler->lock = &priv->mutex;
+	sd->state_lock = &priv->mutex;
+
+This shares the driver's private mutex between the controls and the states.
+
 V4L2 sub-device functions and data structures
 ---------------------------------------------
 
diff --git a/Documentation/driver-api/thermal/intel_dptf.rst b/Documentation/driver-api/thermal/intel_dptf.rst
index 96668dca753a..372bdb4d04c6 100644
--- a/Documentation/driver-api/thermal/intel_dptf.rst
+++ b/Documentation/driver-api/thermal/intel_dptf.rst
@@ -4,7 +4,7 @@
 Intel(R) Dynamic Platform and Thermal Framework Sysfs Interface
 ===============================================================
 
-:Copyright: |copy| 2022 Intel Corporation
+:Copyright: © 2022 Intel Corporation
 
 :Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
 
diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst
index 4a25c5eb6f07..eb9c2d9a4f5f 100644
--- a/Documentation/fault-injection/fault-injection.rst
+++ b/Documentation/fault-injection/fault-injection.rst
@@ -132,16 +132,16 @@ configuration of fault-injection capabilities.
 
 	Format: { 'Y' | 'N' }
 
-	default is 'N', setting it to 'Y' won't inject failures into
-	highmem/user allocations.
+	default is 'Y', setting it to 'N' will also inject failures into
+	highmem/user allocations (__GFP_HIGHMEM allocations).
 
 - /sys/kernel/debug/failslab/ignore-gfp-wait:
 - /sys/kernel/debug/fail_page_alloc/ignore-gfp-wait:
 
 	Format: { 'Y' | 'N' }
 
-	default is 'N', setting it to 'Y' will inject failures
-	only into non-sleep allocations (GFP_ATOMIC allocations).
+	default is 'Y', setting it to 'N' will also inject failures
+	into allocations that can sleep (__GFP_DIRECT_RECLAIM allocations).
 
 - /sys/kernel/debug/fail_page_alloc/min-order:
 
@@ -280,7 +280,7 @@ Application Examples
     printf %#x -1 > /sys/kernel/debug/$FAILTYPE/times
     echo 0 > /sys/kernel/debug/$FAILTYPE/space
     echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
-    echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
+    echo Y > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
 
     faulty_system()
     {
@@ -334,8 +334,8 @@ Application Examples
     printf %#x -1 > /sys/kernel/debug/$FAILTYPE/times
     echo 0 > /sys/kernel/debug/$FAILTYPE/space
     echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
-    echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
-    echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-highmem
+    echo Y > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
+    echo Y > /sys/kernel/debug/$FAILTYPE/ignore-gfp-highmem
     echo 10 > /sys/kernel/debug/$FAILTYPE/stacktrace-depth
 
     trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT
diff --git a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
index 83eafe1a7f68..ff21a83abe62 100644
--- a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
+++ b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
@@ -27,5 +27,5 @@
     |       sparc: | TODO |
     |          um: | TODO |
     |         x86: |  ok  |
-    |      xtensa: | TODO |
+    |      xtensa: |  ok  |
     -----------------------
diff --git a/Documentation/features/time/context-tracking/arch-support.txt b/Documentation/features/time/context-tracking/arch-support.txt
index bb1c1801553e..72e7aadeda7e 100644
--- a/Documentation/features/time/context-tracking/arch-support.txt
+++ b/Documentation/features/time/context-tracking/arch-support.txt
@@ -27,5 +27,5 @@
     |       sparc: |  ok  |
     |          um: | TODO |
     |         x86: |  ok  |
-    |      xtensa: | TODO |
+    |      xtensa: |  ok  |
     -----------------------
diff --git a/Documentation/features/time/virt-cpuacct/arch-support.txt b/Documentation/features/time/virt-cpuacct/arch-support.txt
index 5163a60a1c1e..c905aa3c1d81 100644
--- a/Documentation/features/time/virt-cpuacct/arch-support.txt
+++ b/Documentation/features/time/virt-cpuacct/arch-support.txt
@@ -27,5 +27,5 @@
     |       sparc: |  ok  |
     |          um: | TODO |
     |         x86: |  ok  |
-    |      xtensa: | TODO |
+    |      xtensa: |  ok  |
     -----------------------
diff --git a/Documentation/filesystems/caching/cachefiles.rst b/Documentation/filesystems/caching/cachefiles.rst
index 8bf396b76359..fc7abf712315 100644
--- a/Documentation/filesystems/caching/cachefiles.rst
+++ b/Documentation/filesystems/caching/cachefiles.rst
@@ -28,6 +28,7 @@ Cache on Already Mounted Filesystem
 
  (*) Debugging.
 
+ (*) On-demand Read.
 
 
 Overview
@@ -482,3 +483,180 @@ the control file.  For example::
 	echo $((1|4|8)) >/sys/module/cachefiles/parameters/debug
 
 will turn on all function entry debugging.
+
+
+On-demand Read
+==============
+
+When working in its original mode, CacheFiles serves as a local cache for a
+remote networking fs - while in on-demand read mode, CacheFiles can boost the
+scenario where on-demand read semantics are needed, e.g. container image
+distribution.
+
+The essential difference between these two modes is seen when a cache miss
+occurs: In the original mode, the netfs will fetch the data from the remote
+server and then write it to the cache file; in on-demand read mode, fetching
+the data and writing it into the cache is delegated to a user daemon.
+
+``CONFIG_CACHEFILES_ONDEMAND`` should be enabled to support on-demand read mode.
+
+
+Protocol Communication
+----------------------
+
+The on-demand read mode uses a simple protocol for communication between kernel
+and user daemon. The protocol can be modeled as::
+
+	kernel --[request]--> user daemon --[reply]--> kernel
+
+CacheFiles will send requests to the user daemon when needed.  The user daemon
+should poll the devnode ('/dev/cachefiles') to check if there's a pending
+request to be processed.  A POLLIN event will be returned when there's a pending
+request.
+
+The user daemon then reads the devnode to fetch a request to process.  It should
+be noted that each read only gets one request. When it has finished processing
+the request, the user daemon should write the reply to the devnode.
+
+Each request starts with a message header of the form::
+
+	struct cachefiles_msg {
+		__u32 msg_id;
+		__u32 opcode;
+		__u32 len;
+		__u32 object_id;
+		__u8  data[];
+	};
+
+where:
+
+	* ``msg_id`` is a unique ID identifying this request among all pending
+	  requests.
+
+	* ``opcode`` indicates the type of this request.
+
+	* ``object_id`` is a unique ID identifying the cache file operated on.
+
+	* ``data`` indicates the payload of this request.
+
+	* ``len`` indicates the whole length of this request, including the
+	  header and following type-specific payload.
+
+
+Turning on On-demand Mode
+-------------------------
+
+An optional parameter becomes available to the "bind" command::
+
+	bind [ondemand]
+
+When the "bind" command is given no argument, it defaults to the original mode.
+When it is given the "ondemand" argument, i.e. "bind ondemand", on-demand read
+mode will be enabled.
+
+
+The OPEN Request
+----------------
+
+When the netfs opens a cache file for the first time, a request with the
+CACHEFILES_OP_OPEN opcode, a.k.a an OPEN request will be sent to the user
+daemon.  The payload format is of the form::
+
+	struct cachefiles_open {
+		__u32 volume_key_size;
+		__u32 cookie_key_size;
+		__u32 fd;
+		__u32 flags;
+		__u8  data[];
+	};
+
+where:
+
+	* ``data`` contains the volume_key followed directly by the cookie_key.
+	  The volume key is a NUL-terminated string; the cookie key is binary
+	  data.
+
+	* ``volume_key_size`` indicates the size of the volume key in bytes.
+
+	* ``cookie_key_size`` indicates the size of the cookie key in bytes.
+
+	* ``fd`` indicates an anonymous fd referring to the cache file, through
+	  which the user daemon can perform write/llseek file operations on the
+	  cache file.
+
+
+The user daemon can use the given (volume_key, cookie_key) pair to distinguish
+the requested cache file.  With the given anonymous fd, the user daemon can
+fetch the data and write it to the cache file in the background, even when
+kernel has not triggered a cache miss yet.
+
+Be noted that each cache file has a unique object_id, while it may have multiple
+anonymous fds.  The user daemon may duplicate anonymous fds from the initial
+anonymous fd indicated by the @fd field through dup().  Thus each object_id can
+be mapped to multiple anonymous fds, while the usr daemon itself needs to
+maintain the mapping.
+
+When implementing a user daemon, please be careful of RLIMIT_NOFILE,
+``/proc/sys/fs/nr_open`` and ``/proc/sys/fs/file-max``.  Typically these needn't
+be huge since they're related to the number of open device blobs rather than
+open files of each individual filesystem.
+
+The user daemon should reply the OPEN request by issuing a "copen" (complete
+open) command on the devnode::
+
+	copen <msg_id>,<cache_size>
+
+where:
+
+	* ``msg_id`` must match the msg_id field of the OPEN request.
+
+	* When >= 0, ``cache_size`` indicates the size of the cache file;
+	  when < 0, ``cache_size`` indicates any error code encountered by the
+	  user daemon.
+
+
+The CLOSE Request
+-----------------
+
+When a cookie withdrawn, a CLOSE request (opcode CACHEFILES_OP_CLOSE) will be
+sent to the user daemon.  This tells the user daemon to close all anonymous fds
+associated with the given object_id.  The CLOSE request has no extra payload,
+and shouldn't be replied.
+
+
+The READ Request
+----------------
+
+When a cache miss is encountered in on-demand read mode, CacheFiles will send a
+READ request (opcode CACHEFILES_OP_READ) to the user daemon. This tells the user
+daemon to fetch the contents of the requested file range.  The payload is of the
+form::
+
+	struct cachefiles_read {
+		__u64 off;
+		__u64 len;
+	};
+
+where:
+
+	* ``off`` indicates the starting offset of the requested file range.
+
+	* ``len`` indicates the length of the requested file range.
+
+
+When it receives a READ request, the user daemon should fetch the requested data
+and write it to the cache file identified by object_id.
+
+When it has finished processing the READ request, the user daemon should reply
+by using the CACHEFILES_IOC_READ_COMPLETE ioctl on one of the anonymous fds
+associated with the object_id given in the READ request.  The ioctl is of the
+form::
+
+	ioctl(fd, CACHEFILES_IOC_READ_COMPLETE, msg_id);
+
+where:
+
+	* ``fd`` is one of the anonymous fds associated with the object_id
+	  given.
+
+	* ``msg_id`` must match the msg_id field of the READ request.
diff --git a/Documentation/filesystems/caching/netfs-api.rst b/Documentation/filesystems/caching/netfs-api.rst
index 7308d76a29dc..1d18e9def183 100644
--- a/Documentation/filesystems/caching/netfs-api.rst
+++ b/Documentation/filesystems/caching/netfs-api.rst
@@ -433,11 +433,11 @@ has done a write and then the page it wrote from has been released by the VM,
 after which it *has* to look in the cache.
 
 To inform fscache that a page might now be in the cache, the following function
-should be called from the ``releasepage`` address space op::
+should be called from the ``release_folio`` address space op::
 
 	void fscache_note_page_release(struct fscache_cookie *cookie);
 
-if the page has been released (ie. releasepage returned true).
+if the page has been released (ie. release_folio returned true).
 
 Page release and page invalidation should also wait for any mark left on the
 page to say that a DIO write is underway from that page::
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 6ccd5efb25b7..2e9aaa295125 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -1256,7 +1256,7 @@ inline encryption hardware will encrypt/decrypt the file contents.
 When inline encryption isn't used, filesystems must encrypt/decrypt
 the file contents themselves, as described below:
 
-For the read path (->readpage()) of regular files, filesystems can
+For the read path (->read_folio()) of regular files, filesystems can
 read the ciphertext into the page cache and decrypt it in-place.  The
 page lock must be held until decryption has finished, to prevent the
 page from becoming visible to userspace prematurely.
diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst
index 8cc536d08f51..756f2c215ba1 100644
--- a/Documentation/filesystems/fsverity.rst
+++ b/Documentation/filesystems/fsverity.rst
@@ -70,12 +70,23 @@ must live on a read-write filesystem because they are independently
 updated and potentially user-installed, so dm-verity cannot be used.
 
 The base fs-verity feature is a hashing mechanism only; actually
-authenticating the files is up to userspace.  However, to meet some
-users' needs, fs-verity optionally supports a simple signature
-verification mechanism where users can configure the kernel to require
-that all fs-verity files be signed by a key loaded into a keyring; see
-`Built-in signature verification`_.  Support for fs-verity file hashes
-in IMA (Integrity Measurement Architecture) policies is also planned.
+authenticating the files may be done by:
+
+* Userspace-only
+
+* Builtin signature verification + userspace policy
+
+  fs-verity optionally supports a simple signature verification
+  mechanism where users can configure the kernel to require that
+  all fs-verity files be signed by a key loaded into a keyring;
+  see `Built-in signature verification`_.
+
+* Integrity Measurement Architecture (IMA)
+
+  IMA supports including fs-verity file digests and signatures in the
+  IMA measurement list and verifying fs-verity based file signatures
+  stored as security.ima xattrs, based on policy.
+
 
 User API
 ========
@@ -548,7 +559,7 @@ already verified).  Below, we describe how filesystems implement this.
 Pagecache
 ~~~~~~~~~
 
-For filesystems using Linux's pagecache, the ``->readpage()`` and
+For filesystems using Linux's pagecache, the ``->read_folio()`` and
 ``->readahead()`` methods must be modified to verify pages before they
 are marked Uptodate.  Merely hooking ``->read_iter()`` would be
 insufficient, since ``->read_iter()`` is not used for memory maps.
@@ -653,12 +664,12 @@ weren't already directly answered in other parts of this document.
     hashed and what to do with those hashes, such as log them,
     authenticate them, or add them to a measurement list.
 
-    IMA is planned to support the fs-verity hashing mechanism as an
-    alternative to doing full file hashes, for people who want the
-    performance and security benefits of the Merkle tree based hash.
-    But it doesn't make sense to force all uses of fs-verity to be
-    through IMA.  As a standalone filesystem feature, fs-verity
-    already meets many users' needs, and it's testable like other
+    IMA supports the fs-verity hashing mechanism as an alternative
+    to full file hashes, for those who want the performance and
+    security benefits of the Merkle tree based hash.  However, it
+    doesn't make sense to force all uses of fs-verity to be through
+    IMA.  fs-verity already meets many users' needs even as a
+    standalone filesystem feature, and it's testable like other
     filesystem features e.g. with xfstests.
 
 :Q: Isn't fs-verity useless because the attacker can just modify the
diff --git a/Documentation/filesystems/idmappings.rst b/Documentation/filesystems/idmappings.rst
index 7a879ec3b6bf..c1db8748389c 100644
--- a/Documentation/filesystems/idmappings.rst
+++ b/Documentation/filesystems/idmappings.rst
@@ -369,6 +369,11 @@ kernel maps the caller's userspace id down into a kernel id according to the
 caller's idmapping and then maps that kernel id up according to the
 filesystem's idmapping.
 
+Let's see some examples with caller/filesystem idmapping but without mount
+idmappings. This will exhibit some problems we can hit. After that we will
+revisit/reconsider these examples, this time using mount idmappings, to see how
+they can solve the problems we observed before.
+
 Example 1
 ~~~~~~~~~
 
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index c26d854275a0..515bc48ab58b 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -237,20 +237,20 @@ address_space_operations
 prototypes::
 
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
-	int (*readpage)(struct file *, struct page *);
+	int (*read_folio)(struct file *, struct folio *);
 	int (*writepages)(struct address_space *, struct writeback_control *);
 	bool (*dirty_folio)(struct address_space *, struct folio *folio);
 	void (*readahead)(struct readahead_control *);
 	int (*write_begin)(struct file *, struct address_space *mapping,
-				loff_t pos, unsigned len, unsigned flags,
+				loff_t pos, unsigned len,
 				struct page **pagep, void **fsdata);
 	int (*write_end)(struct file *, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned copied,
 				struct page *page, void *fsdata);
 	sector_t (*bmap)(struct address_space *, sector_t);
 	void (*invalidate_folio) (struct folio *, size_t start, size_t len);
-	int (*releasepage) (struct page *, int);
-	void (*freepage)(struct page *);
+	bool (*release_folio)(struct folio *, gfp_t);
+	void (*free_folio)(struct folio *);
 	int (*direct_IO)(struct kiocb *, struct iov_iter *iter);
 	bool (*isolate_page) (struct page *, isolate_mode_t);
 	int (*migratepage)(struct address_space *, struct page *, struct page *);
@@ -262,22 +262,22 @@ prototypes::
 	int (*swap_deactivate)(struct file *);
 
 locking rules:
-	All except dirty_folio and freepage may block
+	All except dirty_folio and free_folio may block
 
 ======================	======================== =========	===============
-ops			PageLocked(page)	 i_rwsem	invalidate_lock
+ops			folio locked		 i_rwsem	invalidate_lock
 ======================	======================== =========	===============
 writepage:		yes, unlocks (see below)
-readpage:		yes, unlocks				shared
+read_folio:		yes, unlocks				shared
 writepages:
-dirty_folio		maybe
+dirty_folio:		maybe
 readahead:		yes, unlocks				shared
 write_begin:		locks the page		 exclusive
 write_end:		yes, unlocks		 exclusive
 bmap:
 invalidate_folio:	yes					exclusive
-releasepage:		yes
-freepage:		yes
+release_folio:		yes
+free_folio:		yes
 direct_IO:
 isolate_page:		yes
 migratepage:		yes (both)
@@ -289,13 +289,13 @@ swap_activate:		no
 swap_deactivate:	no
 ======================	======================== =========	===============
 
-->write_begin(), ->write_end() and ->readpage() may be called from
+->write_begin(), ->write_end() and ->read_folio() may be called from
 the request handler (/dev/loop).
 
-->readpage() unlocks the page, either synchronously or via I/O
+->read_folio() unlocks the folio, either synchronously or via I/O
 completion.
 
-->readahead() unlocks the pages that I/O is attempted on like ->readpage().
+->readahead() unlocks the folios that I/O is attempted on like ->read_folio().
 
 ->writepage() is used for two purposes: for "memory cleansing" and for
 "sync".  These are quite different operations and the behaviour may differ
@@ -372,12 +372,12 @@ invalidate_lock before invalidating page cache in truncate / hole punch
 path (and thus calling into ->invalidate_folio) to block races between page
 cache invalidation and page cache filling functions (fault, read, ...).
 
-->releasepage() is called when the kernel is about to try to drop the
-buffers from the page in preparation for freeing it.  It returns zero to
-indicate that the buffers are (or may be) freeable.  If ->releasepage is zero,
-the kernel assumes that the fs has no private interest in the buffers.
+->release_folio() is called when the kernel is about to try to drop the
+buffers from the folio in preparation for freeing it.  It returns false to
+indicate that the buffers are (or may be) freeable.  If ->release_folio is
+NULL, the kernel assumes that the fs has no private interest in the buffers.
 
-->freepage() is called when the kernel is done dropping the page
+->free_folio() is called when the kernel has dropped the folio
 from the page cache.
 
 ->launder_folio() may be called prior to releasing a folio if
diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst
index 69f00179fdfe..a80a59941d2f 100644
--- a/Documentation/filesystems/netfs_library.rst
+++ b/Documentation/filesystems/netfs_library.rst
@@ -96,7 +96,7 @@ attached to an inode (or NULL if fscache is disabled)::
 Buffered Read Helpers
 =====================
 
-The library provides a set of read helpers that handle the ->readpage(),
+The library provides a set of read helpers that handle the ->read_folio(),
 ->readahead() and much of the ->write_begin() VM operations and translate them
 into a common call framework.
 
@@ -136,20 +136,19 @@ Read Helper Functions
 Three read helpers are provided::
 
 	void netfs_readahead(struct readahead_control *ractl);
-	int netfs_readpage(struct file *file,
-			   struct page *page);
+	int netfs_read_folio(struct file *file,
+			   struct folio *folio);
 	int netfs_write_begin(struct file *file,
 			      struct address_space *mapping,
 			      loff_t pos,
 			      unsigned int len,
-			      unsigned int flags,
 			      struct folio **_folio,
 			      void **_fsdata);
 
 Each corresponds to a VM address space operation.  These operations use the
 state in the per-inode context.
 
-For ->readahead() and ->readpage(), the network filesystem just point directly
+For ->readahead() and ->read_folio(), the network filesystem just point directly
 at the corresponding read helper; whereas for ->write_begin(), it may be a
 little more complicated as the network filesystem might want to flush
 conflicting writes or track dirty data and needs to put the acquired folio if
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 7c1583dbeb59..2e0e4f0e0c6f 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -624,7 +624,7 @@ any symlink that might use page_follow_link_light/page_put_link() must
 have inode_nohighmem(inode) called before anything might start playing with
 its pagecache.  No highmem pages should end up in the pagecache of such
 symlinks.  That includes any preseeding that might be done during symlink
-creation.  __page_symlink() will honour the mapping gfp flags, so once
+creation.  page_symlink() will honour the mapping gfp flags, so once
 you've done inode_nohighmem() it's safe to use, but if you allocate and
 insert the page manually, make sure to use the right gfp flags.
 
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 061744c436d9..6a0dd99786f9 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -1183,85 +1183,7 @@ Provides counts of softirq handlers serviced since boot time, for each CPU.
     HRTIMER:         0          0          0          0
 	RCU:      1678       1769       2178       2250
 
-
-1.3 IDE devices in /proc/ide
-----------------------------
-
-The subdirectory /proc/ide contains information about all IDE devices of which
-the kernel  is  aware.  There is one subdirectory for each IDE controller, the
-file drivers  and a link for each IDE device, pointing to the device directory
-in the controller specific subtree.
-
-The file 'drivers' contains general information about the drivers used for the
-IDE devices::
-
-  > cat /proc/ide/drivers
-  ide-cdrom version 4.53
-  ide-disk version 1.08
-
-More detailed  information  can  be  found  in  the  controller  specific
-subdirectories. These  are  named  ide0,  ide1  and  so  on.  Each  of  these
-directories contains the files shown in table 1-6.
-
-
-.. table:: Table 1-6: IDE controller info in  /proc/ide/ide?
-
- ======= =======================================
- File    Content
- ======= =======================================
- channel IDE channel (0 or 1)
- config  Configuration (only for PCI/IDE bridge)
- mate    Mate name
- model   Type/Chipset of IDE controller
- ======= =======================================
-
-Each device  connected  to  a  controller  has  a separate subdirectory in the
-controllers directory.  The  files  listed in table 1-7 are contained in these
-directories.
-
-
-.. table:: Table 1-7: IDE device information
-
- ================ ==========================================
- File             Content
- ================ ==========================================
- cache            The cache
- capacity         Capacity of the medium (in 512Byte blocks)
- driver           driver and version
- geometry         physical and logical geometry
- identify         device identify block
- media            media type
- model            device identifier
- settings         device setup
- smart_thresholds IDE disk management thresholds
- smart_values     IDE disk management values
- ================ ==========================================
-
-The most  interesting  file is ``settings``. This file contains a nice
-overview of the drive parameters::
-
-  # cat /proc/ide/ide0/hda/settings
-  name                    value           min             max             mode
-  ----                    -----           ---             ---             ----
-  bios_cyl                526             0               65535           rw
-  bios_head               255             0               255             rw
-  bios_sect               63              0               63              rw
-  breada_readahead        4               0               127             rw
-  bswap                   0               0               1               r
-  file_readahead          72              0               2097151         rw
-  io_32bit                0               0               3               rw
-  keepsettings            0               0               1               rw
-  max_kb_per_request      122             1               127             rw
-  multcount               0               0               8               rw
-  nice1                   1               0               1               rw
-  nowerr                  0               0               1               rw
-  pio_mode                write-only      0               255             w
-  slow                    0               0               1               rw
-  unmaskirq               0               0               1               rw
-  using_dma               0               0               1               rw
-
-
-1.4 Networking info in /proc/net
+1.3 Networking info in /proc/net
 --------------------------------
 
 The subdirectory  /proc/net  follows  the  usual  pattern. Table 1-8 shows the
@@ -1340,7 +1262,7 @@ It will contain information that is specific to that bond, such as the
 current slaves of the bond, the link status of the slaves, and how
 many times the slaves link has failed.
 
-1.5 SCSI info
+1.4 SCSI info
 -------------
 
 If you  have  a  SCSI  host adapter in your system, you'll find a subdirectory
@@ -1403,7 +1325,7 @@ AHA-2940 SCSI adapter::
     Total transfers 0 (0 reads and 0 writes)
 
 
-1.6 Parallel port info in /proc/parport
+1.5 Parallel port info in /proc/parport
 ---------------------------------------
 
 The directory  /proc/parport  contains information about the parallel ports of
@@ -1428,7 +1350,7 @@ These directories contain the four files shown in Table 1-10.
            number or none).
  ========= ====================================================================
 
-1.7 TTY info in /proc/tty
+1.6 TTY info in /proc/tty
 -------------------------
 
 Information about  the  available  and actually used tty's can be found in the
@@ -1463,7 +1385,7 @@ To see  which  tty's  are  currently in use, you can simply look into the file
   unknown              /dev/tty        4    1-63 console
 
 
-1.8 Miscellaneous kernel statistics in /proc/stat
+1.7 Miscellaneous kernel statistics in /proc/stat
 -------------------------------------------------
 
 Various pieces   of  information about  kernel activity  are  available in the
@@ -1536,7 +1458,7 @@ softirqs serviced; each subsequent column is the total for that particular
 softirq.
 
 
-1.9 Ext4 file system parameters
+1.8 Ext4 file system parameters
 -------------------------------
 
 Information about mounted ext4 file systems can be found in
@@ -1552,7 +1474,7 @@ in Table 1-12, below.
  mb_groups       details of multiblock allocator buddy cache of free blocks
  ==============  ==========================================================
 
-1.10 /proc/consoles
+1.9 /proc/consoles
 -------------------
 Shows registered system console lines.
 
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index 794bd1a66bfb..12a011d2cbc6 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -620,9 +620,9 @@ Writeback.
 The first can be used independently to the others.  The VM can try to
 either write dirty pages in order to clean them, or release clean pages
 in order to reuse them.  To do this it can call the ->writepage method
-on dirty pages, and ->releasepage on clean pages with PagePrivate set.
-Clean pages without PagePrivate and with no external references will be
-released without notice being given to the address_space.
+on dirty pages, and ->release_folio on clean folios with the private
+flag set.  Clean pages without PagePrivate and with no external references
+will be released without notice being given to the address_space.
 
 To achieve this functionality, pages need to be placed on an LRU with
 lru_cache_add and mark_page_active needs to be called whenever the page
@@ -656,7 +656,7 @@ by memory-mapping the page.  Data is written into the address space by
 the application, and then written-back to storage typically in whole
 pages, however the address_space has finer control of write sizes.
 
-The read process essentially only requires 'readpage'.  The write
+The read process essentially only requires 'read_folio'.  The write
 process is more complicated and uses write_begin/write_end or
 dirty_folio to write data into the address_space, and writepage and
 writepages to writeback data to storage.
@@ -722,20 +722,20 @@ cache in your filesystem.  The following members are defined:
 
 	struct address_space_operations {
 		int (*writepage)(struct page *page, struct writeback_control *wbc);
-		int (*readpage)(struct file *, struct page *);
+		int (*read_folio)(struct file *, struct folio *);
 		int (*writepages)(struct address_space *, struct writeback_control *);
 		bool (*dirty_folio)(struct address_space *, struct folio *);
 		void (*readahead)(struct readahead_control *);
 		int (*write_begin)(struct file *, struct address_space *mapping,
-				   loff_t pos, unsigned len, unsigned flags,
+				   loff_t pos, unsigned len,
 				struct page **pagep, void **fsdata);
 		int (*write_end)(struct file *, struct address_space *mapping,
 				 loff_t pos, unsigned len, unsigned copied,
 				 struct page *page, void *fsdata);
 		sector_t (*bmap)(struct address_space *, sector_t);
 		void (*invalidate_folio) (struct folio *, size_t start, size_t len);
-		int (*releasepage) (struct page *, int);
-		void (*freepage)(struct page *);
+		bool (*release_folio)(struct folio *, gfp_t);
+		void (*free_folio)(struct folio *);
 		ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
 		/* isolate a page for migration */
 		bool (*isolate_page) (struct page *, isolate_mode_t);
@@ -747,7 +747,7 @@ cache in your filesystem.  The following members are defined:
 
 		bool (*is_partially_uptodate) (struct folio *, size_t from,
 					       size_t count);
-		void (*is_dirty_writeback) (struct page *, bool *, bool *);
+		void (*is_dirty_writeback)(struct folio *, bool *, bool *);
 		int (*error_remove_page) (struct mapping *mapping, struct page *page);
 		int (*swap_activate)(struct file *);
 		int (*swap_deactivate)(struct file *);
@@ -772,14 +772,14 @@ cache in your filesystem.  The following members are defined:
 
 	See the file "Locking" for more details.
 
-``readpage``
-	called by the VM to read a page from backing store.  The page
-	will be Locked when readpage is called, and should be unlocked
-	and marked uptodate once the read completes.  If ->readpage
-	discovers that it needs to unlock the page for some reason, it
-	can do so, and then return AOP_TRUNCATED_PAGE.  In this case,
-	the page will be relocated, relocked and if that all succeeds,
-	->readpage will be called again.
+``read_folio``
+	called by the VM to read a folio from backing store.  The folio
+	will be locked when read_folio is called, and should be unlocked
+	and marked uptodate once the read completes.  If ->read_folio
+	discovers that it cannot perform the I/O at this time, it can
+        unlock the folio and return AOP_TRUNCATED_PAGE.  In this case,
+	the folio will be looked up again, relocked and if that all succeeds,
+	->read_folio will be called again.
 
 ``writepages``
 	called by the VM to write out pages associated with the
@@ -832,9 +832,6 @@ cache in your filesystem.  The following members are defined:
 	passed to write_begin is greater than the number of bytes copied
 	into the page).
 
-	flags is a field for AOP_FLAG_xxx flags, described in
-	include/linux/fs.h.
-
 	A void * may be returned in fsdata, which then gets passed into
 	write_end.
 
@@ -867,36 +864,35 @@ cache in your filesystem.  The following members are defined:
 	address space.  This generally corresponds to either a
 	truncation, punch hole or a complete invalidation of the address
 	space (in the latter case 'offset' will always be 0 and 'length'
-	will be folio_size()).  Any private data associated with the page
+	will be folio_size()).  Any private data associated with the folio
 	should be updated to reflect this truncation.  If offset is 0
 	and length is folio_size(), then the private data should be
-	released, because the page must be able to be completely
-	discarded.  This may be done by calling the ->releasepage
+	released, because the folio must be able to be completely
+	discarded.  This may be done by calling the ->release_folio
 	function, but in this case the release MUST succeed.
 
-``releasepage``
-	releasepage is called on PagePrivate pages to indicate that the
-	page should be freed if possible.  ->releasepage should remove
-	any private data from the page and clear the PagePrivate flag.
-	If releasepage() fails for some reason, it must indicate failure
-	with a 0 return value.  releasepage() is used in two distinct
-	though related cases.  The first is when the VM finds a clean
-	page with no active users and wants to make it a free page.  If
-	->releasepage succeeds, the page will be removed from the
-	address_space and become free.
+``release_folio``
+	release_folio is called on folios with private data to tell the
+	filesystem that the folio is about to be freed.  ->release_folio
+	should remove any private data from the folio and clear the
+	private flag.  If release_folio() fails, it should return false.
+	release_folio() is used in two distinct though related cases.
+	The first is when the VM wants to free a clean folio with no
+	active users.  If ->release_folio succeeds, the folio will be
+	removed from the address_space and be freed.
 
 	The second case is when a request has been made to invalidate
-	some or all pages in an address_space.  This can happen through
-	the fadvise(POSIX_FADV_DONTNEED) system call or by the
-	filesystem explicitly requesting it as nfs and 9fs do (when they
+	some or all folios in an address_space.  This can happen
+	through the fadvise(POSIX_FADV_DONTNEED) system call or by the
+	filesystem explicitly requesting it as nfs and 9p do (when they
 	believe the cache may be out of date with storage) by calling
 	invalidate_inode_pages2().  If the filesystem makes such a call,
-	and needs to be certain that all pages are invalidated, then its
-	releasepage will need to ensure this.  Possibly it can clear the
-	PageUptodate bit if it cannot free private data yet.
+	and needs to be certain that all folios are invalidated, then
+	its release_folio will need to ensure this.  Possibly it can
+	clear the uptodate flag if it cannot free private data yet.
 
-``freepage``
-	freepage is called once the page is no longer visible in the
+``free_folio``
+	free_folio is called once the folio is no longer visible in the
 	page cache in order to allow the cleanup of any private data.
 	Since it may be called by the memory reclaimer, it should not
 	assume that the original address_space mapping still exists, and
@@ -935,14 +931,14 @@ cache in your filesystem.  The following members are defined:
 	without needing I/O to bring the whole page up to date.
 
 ``is_dirty_writeback``
-	Called by the VM when attempting to reclaim a page.  The VM uses
+	Called by the VM when attempting to reclaim a folio.  The VM uses
 	dirty and writeback information to determine if it needs to
 	stall to allow flushers a chance to complete some IO.
-	Ordinarily it can use PageDirty and PageWriteback but some
-	filesystems have more complex state (unstable pages in NFS
+	Ordinarily it can use folio_test_dirty and folio_test_writeback but
+	some filesystems have more complex state (unstable folios in NFS
 	prevent reclaim) or do not set those flags due to locking
 	problems.  This callback allows a filesystem to indicate to the
-	VM if a page should be treated as dirty or writeback for the
+	VM if a folio should be treated as dirty or writeback for the
 	purposes of stalling.
 
 ``error_remove_page``
diff --git a/Documentation/filesystems/zonefs.rst b/Documentation/filesystems/zonefs.rst
index 6b213fe9a33e..394b9f15dce0 100644
--- a/Documentation/filesystems/zonefs.rst
+++ b/Documentation/filesystems/zonefs.rst
@@ -306,8 +306,15 @@ Further notes:
 Mount options
 -------------
 
-zonefs define the "errors=<behavior>" mount option to allow the user to specify
-zonefs behavior in response to I/O errors, inode size inconsistencies or zone
+zonefs defines several mount options:
+* errors=<behavior>
+* explicit-open
+
+"errors=<behavior>" option
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The "errors=<behavior>" option mount option allows the user to specify zonefs
+behavior in response to I/O errors, inode size inconsistencies or zone
 condition changes. The defined behaviors are as follow:
 
 * remount-ro (default)
@@ -326,6 +333,9 @@ discover the amount of data that has been written to the zone. In the case of a
 read-only zone discovered at run-time, as indicated in the previous section.
 The size of the zone file is left unchanged from its last updated value.
 
+"explicit-open" option
+~~~~~~~~~~~~~~~~~~~~~~
+
 A zoned block device (e.g. an NVMe Zoned Namespace device) may have limits on
 the number of zones that can be active, that is, zones that are in the
 implicit open, explicit open or closed conditions.  This potential limitation
@@ -341,6 +351,44 @@ guaranteed that write requests can be processed. Conversely, the
 to the device on the last close() of a zone file if the zone is not full nor
 empty.
 
+Runtime sysfs attributes
+------------------------
+
+zonefs defines several sysfs attributes for mounted devices.  All attributes
+are user readable and can be found in the directory /sys/fs/zonefs/<dev>/,
+where <dev> is the name of the mounted zoned block device.
+
+The attributes defined are as follows.
+
+* **max_wro_seq_files**:  This attribute reports the maximum number of
+  sequential zone files that can be open for writing.  This number corresponds
+  to the maximum number of explicitly or implicitly open zones that the device
+  supports.  A value of 0 means that the device has no limit and that any zone
+  (any file) can be open for writing and written at any time, regardless of the
+  state of other zones.  When the *explicit-open* mount option is used, zonefs
+  will fail any open() system call requesting to open a sequential zone file for
+  writing when the number of sequential zone files already open for writing has
+  reached the *max_wro_seq_files* limit.
+* **nr_wro_seq_files**:  This attribute reports the current number of sequential
+  zone files open for writing.  When the "explicit-open" mount option is used,
+  this number can never exceed *max_wro_seq_files*.  If the *explicit-open*
+  mount option is not used, the reported number can be greater than
+  *max_wro_seq_files*.  In such case, it is the responsibility of the
+  application to not write simultaneously more than *max_wro_seq_files*
+  sequential zone files.  Failure to do so can result in write errors.
+* **max_active_seq_files**:  This attribute reports the maximum number of
+  sequential zone files that are in an active state, that is, sequential zone
+  files that are partially writen (not empty nor full) or that have a zone that
+  is explicitly open (which happens only if the *explicit-open* mount option is
+  used).  This number is always equal to the maximum number of active zones that
+  the device supports.  A value of 0 means that the mounted device has no limit
+  on the number of sequential zone files that can be active.
+* **nr_active_seq_files**:  This attributes reports the current number of
+  sequential zone files that are active. If *max_active_seq_files* is not 0,
+  then the value of *nr_active_seq_files* can never exceed the value of
+  *nr_active_seq_files*, regardless of the use of the *explicit-open* mount
+  option.
+
 Zonefs User Space Tools
 =======================
 
diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index 47fb4d6d4557..6b62425ef9cd 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -167,8 +167,7 @@ The table below shows an example of its usage::
         Name (_DSD, Package () {
             ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
             Package () {
-                Package () {"interrupt-names",
-                Package (2) {"default", "alert"}},
+                Package () { "interrupt-names", Package () { "default", "alert" } },
             }
         ...
         })
diff --git a/Documentation/hwmon/aquacomputer_d5next.rst b/Documentation/hwmon/aquacomputer_d5next.rst
index 3373e27b707d..717e28226cde 100644
--- a/Documentation/hwmon/aquacomputer_d5next.rst
+++ b/Documentation/hwmon/aquacomputer_d5next.rst
@@ -6,7 +6,9 @@ Kernel driver aquacomputer-d5next
 Supported devices:
 
 * Aquacomputer D5 Next watercooling pump
+* Aquacomputer Farbwerk RGB controller
 * Aquacomputer Farbwerk 360 RGB controller
+* Aquacomputer Octo fan controller
 
 Author: Aleksa Savic
 
@@ -28,7 +30,10 @@ seems to require sending it a complete configuration. That includes addressable
 RGB LEDs, for which there is no standard sysfs interface. Thus, that task is
 better suited for userspace tools.
 
-The Farbwerk 360 exposes four temperature sensors. Depending on the device,
+The Octo exposes four temperature sensors and eight PWM controllable fans, along
+with their speed (in RPM), power, voltage and current.
+
+The Farbwerk and Farbwerk 360 expose four temperature sensors. Depending on the device,
 not all sysfs and debugfs entries will be available.
 
 Usage notes
diff --git a/Documentation/hwmon/asus_ec_sensors.rst b/Documentation/hwmon/asus_ec_sensors.rst
index e7e8f1640f45..78ca69eda877 100644
--- a/Documentation/hwmon/asus_ec_sensors.rst
+++ b/Documentation/hwmon/asus_ec_sensors.rst
@@ -4,17 +4,20 @@ Kernel driver asus_ec_sensors
 =================================
 
 Supported boards:
- * PRIME X570-PRO,
- * Pro WS X570-ACE,
- * ROG CROSSHAIR VIII DARK HERO,
+ * PRIME X470-PRO
+ * PRIME X570-PRO
+ * Pro WS X570-ACE
+ * ProArt X570-CREATOR WIFI
+ * ROG CROSSHAIR VIII DARK HERO
  * ROG CROSSHAIR VIII HERO (WI-FI)
- * ROG CROSSHAIR VIII FORMULA,
- * ROG CROSSHAIR VIII HERO,
- * ROG CROSSHAIR VIII IMPACT,
- * ROG STRIX B550-E GAMING,
- * ROG STRIX B550-I GAMING,
- * ROG STRIX X570-E GAMING,
- * ROG STRIX X570-F GAMING,
+ * ROG CROSSHAIR VIII FORMULA
+ * ROG CROSSHAIR VIII HERO
+ * ROG CROSSHAIR VIII IMPACT
+ * ROG STRIX B550-E GAMING
+ * ROG STRIX B550-I GAMING
+ * ROG STRIX X570-E GAMING
+ * ROG STRIX X570-E GAMING WIFI II
+ * ROG STRIX X570-F GAMING
  * ROG STRIX X570-I GAMING
 
 Authors:
@@ -52,3 +55,5 @@ Module Parameters
 		the path is mostly identical for them). If ASUS changes this path
 		in a future BIOS update, this parameter can be used to override
 		the stored in the driver value until it gets updated.
+		A special string ":GLOBAL_LOCK" can be passed to use the ACPI
+		global lock instead of a dedicated mutex.
diff --git a/Documentation/hwmon/dell-smm-hwmon.rst b/Documentation/hwmon/dell-smm-hwmon.rst
index d3323a96665d..e5d85e40972c 100644
--- a/Documentation/hwmon/dell-smm-hwmon.rst
+++ b/Documentation/hwmon/dell-smm-hwmon.rst
@@ -86,6 +86,13 @@ probe the BIOS on your machine and discover the appropriate codes.
 
 Again, when you find new codes, we'd be happy to have your patches!
 
+``thermal`` interface
+---------------------------
+
+The driver also exports the fans as thermal cooling devices with
+``type`` set to ``dell-smm-fan[1-3]``. This allows for easy fan control
+using one of the thermal governors.
+
 Module parameters
 -----------------
 
@@ -324,6 +331,8 @@ Reading of fan types causes erratic fan behaviour.      Studio XPS 8000
 
                                                         Inspiron 580
 
+                                                        Inspiron 3505
+
 Fan-related SMM calls take too long (about 500ms).      Inspiron 7720
 
                                                         Vostro 3360
diff --git a/Documentation/hwmon/hwmon-kernel-api.rst b/Documentation/hwmon/hwmon-kernel-api.rst
index c41eb6108103..f3276b3a381a 100644
--- a/Documentation/hwmon/hwmon-kernel-api.rst
+++ b/Documentation/hwmon/hwmon-kernel-api.rst
@@ -50,6 +50,10 @@ register/unregister functions::
 
   void devm_hwmon_device_unregister(struct device *dev);
 
+  char *hwmon_sanitize_name(const char *name);
+
+  char *devm_hwmon_sanitize_name(struct device *dev, const char *name);
+
 hwmon_device_register_with_groups registers a hardware monitoring device.
 The first parameter of this function is a pointer to the parent device.
 The name parameter is a pointer to the hwmon device name. The registration
@@ -72,7 +76,7 @@ hwmon_device_register_with_info is the most comprehensive and preferred means
 to register a hardware monitoring device. It creates the standard sysfs
 attributes in the hardware monitoring core, letting the driver focus on reading
 from and writing to the chip instead of having to bother with sysfs attributes.
-The parent device parameter cannot be NULL with non-NULL chip info. Its
+The parent device parameter as well as the chip parameter must not be NULL. Its
 parameters are described in more detail below.
 
 devm_hwmon_device_register_with_info is similar to
@@ -95,6 +99,18 @@ All supported hwmon device registration functions only accept valid device
 names. Device names including invalid characters (whitespace, '*', or '-')
 will be rejected. The 'name' parameter is mandatory.
 
+If the driver doesn't use a static device name (for example it uses
+dev_name()), and therefore cannot make sure the name only contains valid
+characters, hwmon_sanitize_name can be used. This convenience function
+will duplicate the string and replace any invalid characters with an
+underscore. It will allocate memory for the new string and it is the
+responsibility of the caller to release the memory when the device is
+removed.
+
+devm_hwmon_sanitize_name is the resource managed version of
+hwmon_sanitize_name; the memory will be freed automatically on device
+removal.
+
 Using devm_hwmon_device_register_with_info()
 --------------------------------------------
 
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 863b76289159..a72c16872ec2 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -90,6 +90,7 @@ Hardware Monitoring Kernel Drivers
    jc42
    k10temp
    k8temp
+   lan966x
    lineage-pem
    lm25066
    lm63
@@ -223,6 +224,7 @@ Hardware Monitoring Kernel Drivers
    wm8350
    xgene-hwmon
    xdpe12284
+   xdpe152c4
    zl6100
 
 .. only::  subproject and html
diff --git a/Documentation/hwmon/lan966x.rst b/Documentation/hwmon/lan966x.rst
new file mode 100644
index 000000000000..1d1724afa5d2
--- /dev/null
+++ b/Documentation/hwmon/lan966x.rst
@@ -0,0 +1,40 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver lan966x-hwmon
+===========================
+
+Supported chips:
+
+  * Microchip LAN9668 (sensor in SoC)
+
+    Prefix: 'lan9668-hwmon'
+
+    Datasheet: https://microchip-ung.github.io/lan9668_reginfo
+
+Authors:
+
+	Michael Walle <michael@walle.cc>
+
+Description
+-----------
+
+This driver implements support for the Microchip LAN9668 on-chip
+temperature sensor as well as its fan controller. It provides one
+temperature sensor and one fan controller. The temperature range
+of the sensor is specified from -40 to +125 degrees Celsius and
+its accuracy is +/- 5 degrees Celsius. The fan controller has a
+tacho input and a PWM output with a customizable PWM output
+frequency ranging from ~20Hz to ~650kHz.
+
+No alarms are supported by the SoC.
+
+The driver exports temperature values, fan tacho input and PWM
+settings via the following sysfs files:
+
+**temp1_input**
+
+**fan1_input**
+
+**pwm1**
+
+**pwm1_freq**
diff --git a/Documentation/hwmon/max16601.rst b/Documentation/hwmon/max16601.rst
index 92c0a7d7808c..6a4eef8efbaf 100644
--- a/Documentation/hwmon/max16601.rst
+++ b/Documentation/hwmon/max16601.rst
@@ -21,6 +21,14 @@ Supported chips:
 
     Datasheet: Not published
 
+  * Maxim MAX16602
+
+    Prefix: 'max16602'
+
+    Addresses scanned: -
+
+    Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX16602.pdf
+
 Author: Guenter Roeck <linux@roeck-us.net>
 
 
diff --git a/Documentation/hwmon/xdpe152c4.rst b/Documentation/hwmon/xdpe152c4.rst
new file mode 100644
index 000000000000..ab92c32d4d69
--- /dev/null
+++ b/Documentation/hwmon/xdpe152c4.rst
@@ -0,0 +1,118 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver xdpe152
+=====================
+
+Supported chips:
+
+  * Infineon XDPE152C4
+
+    Prefix: 'xdpe152c4'
+
+  * Infineon XDPE15284
+
+    Prefix: 'xdpe15284'
+
+Authors:
+
+    Greg Schwendimann <greg.schwendimann@infineon.com>
+
+Description
+-----------
+
+This driver implements support for Infineon Digital Multi-phase Controller
+XDPE152C4 and XDPE15284 dual loop voltage regulators.
+The devices are compliant with:
+
+- Intel VR13, VR13HC and VR14 rev 1.86
+  converter specification.
+- Intel SVID rev 1.93. protocol.
+- PMBus rev 1.3.1 interface.
+
+Devices support linear format for reading input and output voltage, input
+and output current, input and output power and temperature.
+
+Devices support two pages for telemetry.
+
+The driver provides for current: input, maximum and critical thresholds
+and maximum and critical alarms. Low Critical thresholds and Low critical alarm are
+supported only for current output.
+The driver exports the following attributes for via the sysfs files, where
+indexes 1, 2 are for "iin" and 3, 4 for "iout":
+
+**curr[1-4]_crit**
+
+**curr[1-4]_crit_alarm**
+
+**curr[1-4]_input**
+
+**curr[1-4]_label**
+
+**curr[1-4]_max**
+
+**curr[1-4]_max_alarm**
+
+**curr[3-4]_lcrit**
+
+**curr[3-4]_lcrit_alarm**
+
+**curr[3-4]_rated_max**
+
+The driver provides for voltage: input, critical and low critical thresholds
+and critical and low critical alarms.
+The driver exports the following attributes for via the sysfs files, where
+indexes 1, 2 are for "vin" and 3, 4 for "vout":
+
+**in[1-4]_min**
+
+**in[1-4]_crit**
+
+**in[1-4_crit_alarm**
+
+**in[1-4]_input**
+
+**in[1-4]_label**
+
+**in[1-4]_max**
+
+**in[1-4]_max_alarm**
+
+**in[1-4]_min**
+
+**in[1-4]_min_alarm**
+
+**in[3-4]_lcrit**
+
+**in[3-4]_lcrit_alarm**
+
+**in[3-4]_rated_max**
+
+**in[3-4]_rated_min**
+
+The driver provides for power: input and alarms.
+The driver exports the following attributes for via the sysfs files, where
+indexes 1, 2 are for "pin" and 3, 4 for "pout":
+
+**power[1-2]_alarm**
+
+**power[1-4]_input**
+
+**power[1-4]_label**
+
+**power[1-4]_max**
+
+**power[1-4]_rated_max**
+
+The driver provides for temperature: input, maximum and critical thresholds
+and maximum and critical alarms.
+The driver exports the following attributes for via the sysfs files:
+
+**temp[1-2]_crit**
+
+**temp[1-2]_crit_alarm**
+
+**temp[1-2]_input**
+
+**temp[1-2]_max**
+
+**temp[1-2]_max_alarm**
diff --git a/Documentation/ide/ChangeLog.ide-cd.1994-2004 b/Documentation/ide/ChangeLog.ide-cd.1994-2004
deleted file mode 100644
index 4cc3ad99f39b..000000000000
--- a/Documentation/ide/ChangeLog.ide-cd.1994-2004
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * 1.00  Oct 31, 1994 -- Initial version.
- * 1.01  Nov  2, 1994 -- Fixed problem with starting request in
- *                       cdrom_check_status.
- * 1.03  Nov 25, 1994 -- leaving unmask_intr[] as a user-setting (as for disks)
- * (from mlord)       -- minor changes to cdrom_setup()
- *                    -- renamed ide_dev_s to ide_drive_t, enable irq on command
- * 2.00  Nov 27, 1994 -- Generalize packet command interface;
- *                       add audio ioctls.
- * 2.01  Dec  3, 1994 -- Rework packet command interface to handle devices
- *                       which send an interrupt when ready for a command.
- * 2.02  Dec 11, 1994 -- Cache the TOC in the driver.
- *                       Don't use SCMD_PLAYAUDIO_TI; it's not included
- *                       in the current version of ATAPI.
- *                       Try to use LBA instead of track or MSF addressing
- *                       when possible.
- *                       Don't wait for READY_STAT.
- * 2.03  Jan 10, 1995 -- Rewrite block read routines to handle block sizes
- *                       other than 2k and to move multiple sectors in a
- *                       single transaction.
- * 2.04  Apr 21, 1995 -- Add work-around for Creative Labs CD220E drives.
- *                       Thanks to Nick Saw <cwsaw@pts7.pts.mot.com> for
- *                       help in figuring this out.  Ditto for Acer and
- *                       Aztech drives, which seem to have the same problem.
- * 2.04b May 30, 1995 -- Fix to match changes in ide.c version 3.16 -ml
- * 2.05  Jun  8, 1995 -- Don't attempt to retry after an illegal request
- *                        or data protect error.
- *                       Use HWIF and DEV_HWIF macros as in ide.c.
- *                       Always try to do a request_sense after
- *                        a failed command.
- *                       Include an option to give textual descriptions
- *                        of ATAPI errors.
- *                       Fix a bug in handling the sector cache which
- *                        showed up if the drive returned data in 512 byte
- *                        blocks (like Pioneer drives).  Thanks to
- *                        Richard Hirst <srh@gpt.co.uk> for diagnosing this.
- *                       Properly supply the page number field in the
- *                        MODE_SELECT command.
- *                       PLAYAUDIO12 is broken on the Aztech; work around it.
- * 2.05x Aug 11, 1995 -- lots of data structure renaming/restructuring in ide.c
- *                       (my apologies to Scott, but now ide-cd.c is independent)
- * 3.00  Aug 22, 1995 -- Implement CDROMMULTISESSION ioctl.
- *                       Implement CDROMREADAUDIO ioctl (UNTESTED).
- *                       Use input_ide_data() and output_ide_data().
- *                       Add door locking.
- *                       Fix usage count leak in cdrom_open, which happened
- *                        when a read-write mount was attempted.
- *                       Try to load the disk on open.
- *                       Implement CDROMEJECT_SW ioctl (off by default).
- *                       Read total cdrom capacity during open.
- *                       Rearrange logic in cdrom_decode_status.  Issue
- *                        request sense commands for failed packet commands
- *                        from here instead of from cdrom_queue_packet_command.
- *                        Fix a race condition in retrieving error information.
- *                       Suppress printing normal unit attention errors and
- *                        some drive not ready errors.
- *                       Implement CDROMVOLREAD ioctl.
- *                       Implement CDROMREADMODE1/2 ioctls.
- *                       Fix race condition in setting up interrupt handlers
- *                        when the `serialize' option is used.
- * 3.01  Sep  2, 1995 -- Fix ordering of reenabling interrupts in
- *                        cdrom_queue_request.
- *                       Another try at using ide_[input,output]_data.
- * 3.02  Sep 16, 1995 -- Stick total disk capacity in partition table as well.
- *                       Make VERBOSE_IDE_CD_ERRORS dump failed command again.
- *                       Dump out more information for ILLEGAL REQUEST errs.
- *                       Fix handling of errors occurring before the
- *                        packet command is transferred.
- *                       Fix transfers with odd bytelengths.
- * 3.03  Oct 27, 1995 -- Some Creative drives have an id of just `CD'.
- *                       `DCI-2S10' drives are broken too.
- * 3.04  Nov 20, 1995 -- So are Vertos drives.
- * 3.05  Dec  1, 1995 -- Changes to go with overhaul of ide.c and ide-tape.c
- * 3.06  Dec 16, 1995 -- Add support needed for partitions.
- *                       More workarounds for Vertos bugs (based on patches
- *                        from Holger Dietze <dietze@aix520.informatik.uni-leipzig.de>).
- *                       Try to eliminate byteorder assumptions.
- *                       Use atapi_cdrom_subchnl struct definition.
- *                       Add STANDARD_ATAPI compilation option.
- * 3.07  Jan 29, 1996 -- More twiddling for broken drives: Sony 55D,
- *                        Vertos 300.
- *                       Add NO_DOOR_LOCKING configuration option.
- *                       Handle drive_cmd requests w/NULL args (for hdparm -t).
- *                       Work around sporadic Sony55e audio play problem.
- * 3.07a Feb 11, 1996 -- check drive->id for NULL before dereferencing, to fix
- *                        problem with "hde=cdrom" with no drive present.  -ml
- * 3.08  Mar  6, 1996 -- More Vertos workarounds.
- * 3.09  Apr  5, 1996 -- Add CDROMCLOSETRAY ioctl.
- *                       Switch to using MSF addressing for audio commands.
- *                       Reformat to match kernel tabbing style.
- *                       Add CDROM_GET_UPC ioctl.
- * 3.10  Apr 10, 1996 -- Fix compilation error with STANDARD_ATAPI.
- * 3.11  Apr 29, 1996 -- Patch from Heiko Eißfeldt <heiko@colossus.escape.de>
- *                       to remove redundant verify_area calls.
- * 3.12  May  7, 1996 -- Rudimentary changer support.  Based on patches
- *                        from Gerhard Zuber <zuber@berlin.snafu.de>.
- *                       Let open succeed even if there's no loaded disc.
- * 3.13  May 19, 1996 -- Fixes for changer code.
- * 3.14  May 29, 1996 -- Add work-around for Vertos 600.
- *                        (From Hennus Bergman <hennus@sky.ow.nl>.)
- * 3.15  July 2, 1996 -- Added support for Sanyo 3 CD changers
- *                        from Ben Galliart <bgallia@luc.edu> with
- *                        special help from Jeff Lightfoot
- *                        <jeffml@pobox.com>
- * 3.15a July 9, 1996 -- Improved Sanyo 3 CD changer identification
- * 3.16  Jul 28, 1996 -- Fix from Gadi to reduce kernel stack usage for ioctl.
- * 3.17  Sep 17, 1996 -- Tweak audio reads for some drives.
- *                       Start changing CDROMLOADFROMSLOT to CDROM_SELECT_DISC.
- * 3.18  Oct 31, 1996 -- Added module and DMA support.
- *
- * 4.00  Nov 5, 1996   -- New ide-cd maintainer,
- *                                 Erik B. Andersen <andersee@debian.org>
- *                     -- Newer Creative drives don't always set the error
- *                          register correctly.  Make sure we see media changes
- *                          regardless.
- *                     -- Integrate with generic cdrom driver.
- *                     -- CDROMGETSPINDOWN and CDROMSETSPINDOWN ioctls, based on
- *                          a patch from Ciro Cattuto <>.
- *                     -- Call set_device_ro.
- *                     -- Implement CDROMMECHANISMSTATUS and CDROMSLOTTABLE
- *                          ioctls, based on patch by Erik Andersen
- *                     -- Add some probes of drive capability during setup.
- *
- * 4.01  Nov 11, 1996  -- Split into ide-cd.c and ide-cd.h
- *                     -- Removed CDROMMECHANISMSTATUS and CDROMSLOTTABLE
- *                          ioctls in favor of a generalized approach
- *                          using the generic cdrom driver.
- *                     -- Fully integrated with the 2.1.X kernel.
- *                     -- Other stuff that I forgot (lots of changes)
- *
- * 4.02  Dec 01, 1996  -- Applied patch from Gadi Oxman <gadio@netvision.net.il>
- *                          to fix the drive door locking problems.
- *
- * 4.03  Dec 04, 1996  -- Added DSC overlap support.
- * 4.04  Dec 29, 1996  -- Added CDROMREADRAW ioclt based on patch
- *                          by Ales Makarov (xmakarov@sun.felk.cvut.cz)
- *
- * 4.05  Nov 20, 1997  -- Modified to print more drive info on init
- *                        Minor other changes
- *                        Fix errors on CDROMSTOP (If you have a "Dolphin",
- *                          you must define IHAVEADOLPHIN)
- *                        Added identifier so new Sanyo CD-changer works
- *                        Better detection if door locking isn't supported
- *
- * 4.06  Dec 17, 1997  -- fixed endless "tray open" messages  -ml
- * 4.07  Dec 17, 1997  -- fallback to set pc->stat on "tray open"
- * 4.08  Dec 18, 1997  -- spew less noise when tray is empty
- *                     -- fix speed display for ACER 24X, 18X
- * 4.09  Jan 04, 1998  -- fix handling of the last block so we return
- *                         an end of file instead of an I/O error (Gadi)
- * 4.10  Jan 24, 1998  -- fixed a bug so now changers can change to a new
- *                         slot when there is no disc in the current slot.
- *                     -- Fixed a memory leak where info->changer_info was
- *                         malloc'ed but never free'd when closing the device.
- *                     -- Cleaned up the global namespace a bit by making more
- *                         functions static that should already have been.
- * 4.11  Mar 12, 1998  -- Added support for the CDROM_SELECT_SPEED ioctl
- *                         based on a patch for 2.0.33 by Jelle Foks
- *                         <jelle@scintilla.utwente.nl>, a patch for 2.0.33
- *                         by Toni Giorgino <toni@pcape2.pi.infn.it>, the SCSI
- *                         version, and my own efforts.  -erik
- *                     -- Fixed a stupid bug which egcs was kind enough to
- *                         inform me of where "Illegal mode for this track"
- *                         was never returned due to a comparison on data
- *                         types of limited range.
- * 4.12  Mar 29, 1998  -- Fixed bug in CDROM_SELECT_SPEED so write speed is
- *                         now set ionly for CD-R and CD-RW drives.  I had
- *                         removed this support because it produced errors.
- *                         It produced errors _only_ for non-writers. duh.
- * 4.13  May 05, 1998  -- Suppress useless "in progress of becoming ready"
- *                         messages, since this is not an error.
- *                     -- Change error messages to be const
- *                     -- Remove a "\t" which looks ugly in the syslogs
- * 4.14  July 17, 1998 -- Change to pointing to .ps version of ATAPI spec
- *                         since the .pdf version doesn't seem to work...
- *                     -- Updated the TODO list to something more current.
- *
- * 4.15  Aug 25, 1998  -- Updated ide-cd.h to respect machine endianness,
- *                         patch thanks to "Eddie C. Dost" <ecd@skynet.be>
- *
- * 4.50  Oct 19, 1998  -- New maintainers!
- *                         Jens Axboe <axboe@image.dk>
- *                         Chris Zwilling <chris@cloudnet.com>
- *
- * 4.51  Dec 23, 1998  -- Jens Axboe <axboe@image.dk>
- *                      - ide_cdrom_reset enabled since the ide subsystem
- *                         handles resets fine now. <axboe@image.dk>
- *                      - Transfer size fix for Samsung CD-ROMs, thanks to
- *                        "Ville Hallik" <ville.hallik@mail.ee>.
- *                      - other minor stuff.
- *
- * 4.52  Jan 19, 1999  -- Jens Axboe <axboe@image.dk>
- *                      - Detect DVD-ROM/RAM drives
- *
- * 4.53  Feb 22, 1999   - Include other model Samsung and one Goldstar
- *                         drive in transfer size limit.
- *                      - Fix the I/O error when doing eject without a medium
- *                         loaded on some drives.
- *                      - CDROMREADMODE2 is now implemented through
- *                         CDROMREADRAW, since many drives don't support
- *                         MODE2 (even though ATAPI 2.6 says they must).
- *                      - Added ignore parameter to ide-cd (as a module), eg
- *                         	insmod ide-cd ignore='hda hdb'
- *                         Useful when using ide-cd in conjunction with
- *                         ide-scsi. TODO: non-modular way of doing the
- *                         same.
- *
- * 4.54  Aug 5, 1999	- Support for MMC2 class commands through the generic
- *			  packet interface to cdrom.c.
- *			- Unified audio ioctl support, most of it.
- *			- cleaned up various deprecated verify_area().
- *			- Added ide_cdrom_packet() as the interface for
- *			  the Uniform generic_packet().
- *			- bunch of other stuff, will fill in logs later.
- *			- report 1 slot for non-changers, like the other
- *			  cd-rom drivers. don't report select disc for
- *			  non-changers as well.
- *			- mask out audio playing, if the device can't do it.
- *
- * 4.55  Sep 1, 1999	- Eliminated the rest of the audio ioctls, except
- *			  for CDROMREADTOC[ENTRY|HEADER]. Some of the drivers
- *			  use this independently of the actual audio handling.
- *			  They will disappear later when I get the time to
- *			  do it cleanly.
- *			- Minimize the TOC reading - only do it when we
- *			  know a media change has occurred.
- *			- Moved all the CDROMREADx ioctls to the Uniform layer.
- *			- Heiko Eißfeldt <heiko@colossus.escape.de> supplied
- *			  some fixes for CDI.
- *			- CD-ROM leaving door locked fix from Andries
- *			  Brouwer <Andries.Brouwer@cwi.nl>
- *			- Erik Andersen <andersen@xmission.com> unified
- *			  commands across the various drivers and how
- *			  sense errors are handled.
- *
- * 4.56  Sep 12, 1999	- Removed changer support - it is now in the
- *			  Uniform layer.
- *			- Added partition based multisession handling.
- *			- Mode sense and mode select moved to the
- *			  Uniform layer.
- *			- Fixed a problem with WPI CDS-32X drive - it
- *			  failed the capabilities
- *
- * 4.57  Apr 7, 2000	- Fixed sense reporting.
- *			- Fixed possible oops in ide_cdrom_get_last_session()
- *			- Fix locking mania and make ide_cdrom_reset relock
- *			- Stop spewing errors to log when magicdev polls with
- *			  TEST_UNIT_READY on some drives.
- *			- Various fixes from Tobias Ringstrom:
- *			  tray if it was locked prior to the reset.
- *			  - cdrom_read_capacity returns one frame too little.
- *			  - Fix real capacity reporting.
- *
- * 4.58  May 1, 2000	- Clean up ACER50 stuff.
- *			- Fix small problem with ide_cdrom_capacity
- *
- * 4.59  Aug 11, 2000	- Fix changer problem in cdrom_read_toc, we weren't
- *			  correctly sensing a disc change.
- *			- Rearranged some code
- *			- Use extended sense on drives that support it for
- *			  correctly reporting tray status -- from
- *			  Michael D Johnson <johnsom@orst.edu>
- * 4.60  Dec 17, 2003	- Add mt rainier support
- *			- Bump timeout for packet commands, matches sr
- *			- Odd stuff
- * 4.61  Jan 22, 2004	- support hardware sector sizes other than 2kB,
- *			  Pascal Schmidt <der.eremit@email.de>
- */
diff --git a/Documentation/ide/ChangeLog.ide-floppy.1996-2002 b/Documentation/ide/ChangeLog.ide-floppy.1996-2002
deleted file mode 100644
index 46c19ef32a9e..000000000000
--- a/Documentation/ide/ChangeLog.ide-floppy.1996-2002
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Many thanks to Lode Leroy <Lode.Leroy@www.ibase.be>, who tested so many
- * ALPHA patches to this driver on an EASYSTOR LS-120 ATAPI floppy drive.
- *
- * Ver 0.1   Oct 17 96   Initial test version, mostly based on ide-tape.c.
- * Ver 0.2   Oct 31 96   Minor changes.
- * Ver 0.3   Dec  2 96   Fixed error recovery bug.
- * Ver 0.4   Jan 26 97   Add support for the HDIO_GETGEO ioctl.
- * Ver 0.5   Feb 21 97   Add partitions support.
- *                       Use the minimum of the LBA and CHS capacities.
- *                       Avoid hwgroup->rq == NULL on the last irq.
- *                       Fix potential null dereferencing with DEBUG_LOG.
- * Ver 0.8   Dec  7 97   Increase irq timeout from 10 to 50 seconds.
- *                       Add media write-protect detection.
- *                       Issue START command only if TEST UNIT READY fails.
- *                       Add work-around for IOMEGA ZIP revision 21.D.
- *                       Remove idefloppy_get_capabilities().
- * Ver 0.9   Jul  4 99   Fix a bug which might have caused the number of
- *                        bytes requested on each interrupt to be zero.
- *                        Thanks to <shanos@es.co.nz> for pointing this out.
- * Ver 0.9.sv Jan 6 01   Sam Varshavchik <mrsam@courier-mta.com>
- *                       Implement low level formatting.  Reimplemented
- *                       IDEFLOPPY_CAPABILITIES_PAGE, since we need the srfp
- *                       bit.  My LS-120 drive barfs on
- *                       IDEFLOPPY_CAPABILITIES_PAGE, but maybe it's just me.
- *                       Compromise by not reporting a failure to get this
- *                       mode page.  Implemented four IOCTLs in order to
- *                       implement formatting.  IOCTls begin with 0x4600,
- *                       0x46 is 'F' as in Format.
- *            Jan 9 01   Userland option to select format verify.
- *                       Added PC_SUPPRESS_ERROR flag - some idefloppy drives
- *                       do not implement IDEFLOPPY_CAPABILITIES_PAGE, and
- *                       return a sense error.  Suppress error reporting in
- *                       this particular case in order to avoid spurious
- *                       errors in syslog.  The culprit is
- *                       idefloppy_get_capability_page(), so move it to
- *                       idefloppy_begin_format() so that it's not used
- *                       unless absolutely necessary.
- *                       If drive does not support format progress indication
- *                       monitor the dsc bit in the status register.
- *                       Also, O_NDELAY on open will allow the device to be
- *                       opened without a disk available.  This can be used to
- *                       open an unformatted disk, or get the device capacity.
- * Ver 0.91  Dec 11 99   Added IOMEGA Clik! drive support by
- *     		   <paul@paulbristow.net>
- * Ver 0.92  Oct 22 00   Paul Bristow became official maintainer for this
- *           		   driver.  Included Powerbook internal zip kludge.
- * Ver 0.93  Oct 24 00   Fixed bugs for Clik! drive
- *                        no disk on insert and disk change now works
- * Ver 0.94  Oct 27 00   Tidied up to remove strstr(Clik) everywhere
- * Ver 0.95  Nov  7 00   Brought across to kernel 2.4
- * Ver 0.96  Jan  7 01   Actually in line with release version of 2.4.0
- *                       including set_bit patch from Rusty Russell
- * Ver 0.97  Jul 22 01   Merge 0.91-0.96 onto 0.9.sv for ac series
- * Ver 0.97.sv Aug 3 01  Backported from 2.4.7-ac3
- * Ver 0.98  Oct 26 01   Split idefloppy_transfer_pc into two pieces to
- *                        fix a lost interrupt problem. It appears the busy
- *                        bit was being deasserted by my IOMEGA ATAPI ZIP 100
- *                        drive before the drive was actually ready.
- * Ver 0.98a Oct 29 01   Expose delay value so we can play.
- * Ver 0.99  Feb 24 02   Remove duplicate code, modify clik! detection code
- *                        to support new PocketZip drives
- */
diff --git a/Documentation/ide/ChangeLog.ide-tape.1995-2002 b/Documentation/ide/ChangeLog.ide-tape.1995-2002
deleted file mode 100644
index 877fac8770b3..000000000000
--- a/Documentation/ide/ChangeLog.ide-tape.1995-2002
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- * Ver 0.1   Nov  1 95   Pre-working code :-)
- * Ver 0.2   Nov 23 95   A short backup (few megabytes) and restore procedure
- *                        was successful ! (Using tar cvf ... on the block
- *                        device interface).
- *                       A longer backup resulted in major swapping, bad
- *                        overall Linux performance and eventually failed as
- *                        we received non serial read-ahead requests from the
- *                        buffer cache.
- * Ver 0.3   Nov 28 95   Long backups are now possible, thanks to the
- *                        character device interface. Linux's responsiveness
- *                        and performance doesn't seem to be much affected
- *                        from the background backup procedure.
- *                       Some general mtio.h magnetic tape operations are
- *                        now supported by our character device. As a result,
- *                        popular tape utilities are starting to work with
- *                        ide tapes :-)
- *                       The following configurations were tested:
- *                       1. An IDE ATAPI TAPE shares the same interface
- *                        and irq with an IDE ATAPI CDROM.
- *                       2. An IDE ATAPI TAPE shares the same interface
- *                        and irq with a normal IDE disk.
- *                        Both configurations seemed to work just fine !
- *                        However, to be on the safe side, it is meanwhile
- *                        recommended to give the IDE TAPE its own interface
- *                        and irq.
- *                       The one thing which needs to be done here is to
- *                        add a "request postpone" feature to ide.c,
- *                        so that we won't have to wait for the tape to finish
- *                        performing a long media access (DSC) request (such
- *                        as a rewind) before we can access the other device
- *                        on the same interface. This effect doesn't disturb
- *                        normal operation most of the time because read/write
- *                        requests are relatively fast, and once we are
- *                        performing one tape r/w request, a lot of requests
- *                        from the other device can be queued and ide.c will
- *			  service all of them after this single tape request.
- * Ver 1.0   Dec 11 95   Integrated into Linux 1.3.46 development tree.
- *                       On each read / write request, we now ask the drive
- *                        if we can transfer a constant number of bytes
- *                        (a parameter of the drive) only to its buffers,
- *                        without causing actual media access. If we can't,
- *                        we just wait until we can by polling the DSC bit.
- *                        This ensures that while we are not transferring
- *                        more bytes than the constant referred to above, the
- *                        interrupt latency will not become too high and
- *                        we won't cause an interrupt timeout, as happened
- *                        occasionally in the previous version.
- *                       While polling for DSC, the current request is
- *                        postponed and ide.c is free to handle requests from
- *                        the other device. This is handled transparently to
- *                        ide.c. The hwgroup locking method which was used
- *                        in the previous version was removed.
- *                       Use of new general features which are provided by
- *                        ide.c for use with atapi devices.
- *                        (Programming done by Mark Lord)
- *                       Few potential bug fixes (Again, suggested by Mark)
- *                       Single character device data transfers are now
- *                        not limited in size, as they were before.
- *                       We are asking the tape about its recommended
- *                        transfer unit and send a larger data transfer
- *                        as several transfers of the above size.
- *                        For best results, use an integral number of this
- *                        basic unit (which is shown during driver
- *                        initialization). I will soon add an ioctl to get
- *                        this important parameter.
- *                       Our data transfer buffer is allocated on startup,
- *                        rather than before each data transfer. This should
- *                        ensure that we will indeed have a data buffer.
- * Ver 1.1   Dec 14 95   Fixed random problems which occurred when the tape
- *                        shared an interface with another device.
- *                        (poll_for_dsc was a complete mess).
- *                       Removed some old (non-active) code which had
- *                        to do with supporting buffer cache originated
- *                        requests.
- *                       The block device interface can now be opened, so
- *                        that general ide driver features like the unmask
- *                        interrupts flag can be selected with an ioctl.
- *                        This is the only use of the block device interface.
- *                       New fast pipelined operation mode (currently only on
- *                        writes). When using the pipelined mode, the
- *                        throughput can potentially reach the maximum
- *                        tape supported throughput, regardless of the
- *                        user backup program. On my tape drive, it sometimes
- *                        boosted performance by a factor of 2. Pipelined
- *                        mode is enabled by default, but since it has a few
- *                        downfalls as well, you may want to disable it.
- *                        A short explanation of the pipelined operation mode
- *                        is available below.
- * Ver 1.2   Jan  1 96   Eliminated pipelined mode race condition.
- *                       Added pipeline read mode. As a result, restores
- *                        are now as fast as backups.
- *                       Optimized shared interface behavior. The new behavior
- *                        typically results in better IDE bus efficiency and
- *                        higher tape throughput.
- *                       Pre-calculation of the expected read/write request
- *                        service time, based on the tape's parameters. In
- *                        the pipelined operation mode, this allows us to
- *                        adjust our polling frequency to a much lower value,
- *                        and thus to dramatically reduce our load on Linux,
- *                        without any decrease in performance.
- *                       Implemented additional mtio.h operations.
- *                       The recommended user block size is returned by
- *                        the MTIOCGET ioctl.
- *                       Additional minor changes.
- * Ver 1.3   Feb  9 96   Fixed pipelined read mode bug which prevented the
- *                        use of some block sizes during a restore procedure.
- *                       The character device interface will now present a
- *                        continuous view of the media - any mix of block sizes
- *                        during a backup/restore procedure is supported. The
- *                        driver will buffer the requests internally and
- *                        convert them to the tape's recommended transfer
- *                        unit, making performance almost independent of the
- *                        chosen user block size.
- *                       Some improvements in error recovery.
- *                       By cooperating with ide-dma.c, bus mastering DMA can
- *                        now sometimes be used with IDE tape drives as well.
- *                        Bus mastering DMA has the potential to dramatically
- *                        reduce the CPU's overhead when accessing the device,
- *                        and can be enabled by using hdparm -d1 on the tape's
- *                        block device interface. For more info, read the
- *                        comments in ide-dma.c.
- * Ver 1.4   Mar 13 96   Fixed serialize support.
- * Ver 1.5   Apr 12 96   Fixed shared interface operation, broken in 1.3.85.
- *                       Fixed pipelined read mode inefficiency.
- *                       Fixed nasty null dereferencing bug.
- * Ver 1.6   Aug 16 96   Fixed FPU usage in the driver.
- *                       Fixed end of media bug.
- * Ver 1.7   Sep 10 96   Minor changes for the CONNER CTT8000-A model.
- * Ver 1.8   Sep 26 96   Attempt to find a better balance between good
- *                        interactive response and high system throughput.
- * Ver 1.9   Nov  5 96   Automatically cross encountered filemarks rather
- *                        than requiring an explicit FSF command.
- *                       Abort pending requests at end of media.
- *                       MTTELL was sometimes returning incorrect results.
- *                       Return the real block size in the MTIOCGET ioctl.
- *                       Some error recovery bug fixes.
- * Ver 1.10  Nov  5 96   Major reorganization.
- *                       Reduced CPU overhead a bit by eliminating internal
- *                        bounce buffers.
- *                       Added module support.
- *                       Added multiple tape drives support.
- *                       Added partition support.
- *                       Rewrote DSC handling.
- *                       Some portability fixes.
- *                       Removed ide-tape.h.
- *                       Additional minor changes.
- * Ver 1.11  Dec  2 96   Bug fix in previous DSC timeout handling.
- *                       Use ide_stall_queue() for DSC overlap.
- *                       Use the maximum speed rather than the current speed
- *                        to compute the request service time.
- * Ver 1.12  Dec  7 97   Fix random memory overwriting and/or last block data
- *                        corruption, which could occur if the total number
- *                        of bytes written to the tape was not an integral
- *                        number of tape blocks.
- *                       Add support for INTERRUPT DRQ devices.
- * Ver 1.13  Jan  2 98   Add "speed == 0" work-around for HP COLORADO 5GB
- * Ver 1.14  Dec 30 98   Partial fixes for the Sony/AIWA tape drives.
- *                       Replace cli()/sti() with hwgroup spinlocks.
- * Ver 1.15  Mar 25 99   Fix SMP race condition by replacing hwgroup
- *                        spinlock with private per-tape spinlock.
- * Ver 1.16  Sep  1 99   Add OnStream tape support.
- *                       Abort read pipeline on EOD.
- *                       Wait for the tape to become ready in case it returns
- *                        "in the process of becoming ready" on open().
- *                       Fix zero padding of the last written block in
- *                        case the tape block size is larger than PAGE_SIZE.
- *                       Decrease the default disconnection time to tn.
- * Ver 1.16e Oct  3 99   Minor fixes.
- * Ver 1.16e1 Oct 13 99  Patches by Arnold Niessen,
- *                          niessen@iae.nl / arnold.niessen@philips.com
- *                   GO-1)  Undefined code in idetape_read_position
- *				according to Gadi's email
- *                   AJN-1) Minor fix asc == 11 should be asc == 0x11
- *                               in idetape_issue_packet_command (did effect
- *                               debugging output only)
- *                   AJN-2) Added more debugging output, and
- *                              added ide-tape: where missing. I would also
- *				like to add tape->name where possible
- *                   AJN-3) Added different debug_level's
- *                              via /proc/ide/hdc/settings
- *				"debug_level" determines amount of debugging output;
- *				can be changed using /proc/ide/hdx/settings
- *				0 : almost no debugging output
- *				1 : 0+output errors only
- *				2 : 1+output all sensekey/asc
- *				3 : 2+follow all chrdev related procedures
- *				4 : 3+follow all procedures
- *				5 : 4+include pc_stack rq_stack info
- *				6 : 5+USE_COUNT updates
- *                   AJN-4) Fixed timeout for retension in idetape_queue_pc_tail
- *				from 5 to 10 minutes
- *                   AJN-5) Changed maximum number of blocks to skip when
- *                              reading tapes with multiple consecutive write
- *                              errors from 100 to 1000 in idetape_get_logical_blk
- *                   Proposed changes to code:
- *                   1) output "logical_blk_num" via /proc
- *                   2) output "current_operation" via /proc
- *                   3) Either solve or document the fact that `mt rewind' is
- *                      required after reading from /dev/nhtx to be
- *			able to rmmod the idetape module;
- *			Also, sometimes an application finishes but the
- *			device remains `busy' for some time. Same cause ?
- *                   Proposed changes to release-notes:
- *		     4) write a simple `quickstart' section in the
- *                      release notes; I volunteer if you don't want to
- *		     5) include a pointer to video4linux in the doc
- *                      to stimulate video applications
- *                   6) release notes lines 331 and 362: explain what happens
- *			if the application data rate is higher than 1100 KB/s;
- *			similar approach to lower-than-500 kB/s ?
- *		     7) 6.6 Comparison; wouldn't it be better to allow different
- *			strategies for read and write ?
- *			Wouldn't it be better to control the tape buffer
- *			contents instead of the bandwidth ?
- *		     8) line 536: replace will by would (if I understand
- *			this section correctly, a hypothetical and unwanted situation
- *			 is being described)
- * Ver 1.16f Dec 15 99   Change place of the secondary OnStream header frames.
- * Ver 1.17  Nov 2000 / Jan 2001  Marcel Mol, marcel@mesa.nl
- *			- Add idetape_onstream_mode_sense_tape_parameter_page
- *			  function to get tape capacity in frames: tape->capacity.
- *			- Add support for DI-50 drives( or any DI- drive).
- *			- 'workaround' for read error/blank block around block 3000.
- *			- Implement Early warning for end of media for Onstream.
- *			- Cosmetic code changes for readability.
- *			- Idetape_position_tape should not use SKIP bit during
- *			  Onstream read recovery.
- *			- Add capacity, logical_blk_num and first/last_frame_position
- *			  to /proc/ide/hd?/settings.
- *			- Module use count was gone in the Linux 2.4 driver.
- * Ver 1.17a Apr 2001 Willem Riede osst@riede.org
- *			- Get drive's actual block size from mode sense block descriptor
- *			- Limit size of pipeline
- * Ver 1.17b Oct 2002   Alan Stern <stern@rowland.harvard.edu>
- *			Changed IDETAPE_MIN_PIPELINE_STAGES to 1 and actually used
- *			 it in the code!
- *			Actually removed aborted stages in idetape_abort_pipeline
- *			 instead of just changing the command code.
- *			Made the transfer byte count for Request Sense equal to the
- *			 actual length of the data transfer.
- *			Changed handling of partial data transfers: they do not
- *			 cause DMA errors.
- *			Moved initiation of DMA transfers to the correct place.
- *			Removed reference to unallocated memory.
- *			Made __idetape_discard_read_pipeline return the number of
- *			 sectors skipped, not the number of stages.
- *			Replaced errant kfree() calls with __idetape_kfree_stage().
- *			Fixed off-by-one error in testing the pipeline length.
- *			Fixed handling of filemarks in the read pipeline.
- *			Small code optimization for MTBSF and MTBSFM ioctls.
- *			Don't try to unlock the door during device close if is
- *			 already unlocked!
- *			Cosmetic fixes to miscellaneous debugging output messages.
- *			Set the minimum /proc/ide/hd?/settings values for "pipeline",
- *			 "pipeline_min", and "pipeline_max" to 1.
- */
diff --git a/Documentation/ide/changelogs.rst b/Documentation/ide/changelogs.rst
deleted file mode 100644
index fdf9d0fb8027..000000000000
--- a/Documentation/ide/changelogs.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-Changelog for ide cd
---------------------
-
- .. include:: ChangeLog.ide-cd.1994-2004
-    :literal:
-
-Changelog for ide floppy
-------------------------
-
- .. include:: ChangeLog.ide-floppy.1996-2002
-    :literal:
-
-Changelog for ide tape
-----------------------
-
- .. include:: ChangeLog.ide-tape.1995-2002
-    :literal:
diff --git a/Documentation/ide/ide-tape.rst b/Documentation/ide/ide-tape.rst
deleted file mode 100644
index 3e061d9c0e38..000000000000
--- a/Documentation/ide/ide-tape.rst
+++ /dev/null
@@ -1,68 +0,0 @@
-===============================
-IDE ATAPI streaming tape driver
-===============================
-
-This driver is a part of the Linux ide driver.
-
-The driver, in co-operation with ide.c, basically traverses the
-request-list for the block device interface. The character device
-interface, on the other hand, creates new requests, adds them
-to the request-list of the block device, and waits for their completion.
-
-The block device major and minor numbers are determined from the
-tape's relative position in the ide interfaces, as explained in ide.c.
-
-The character device interface consists of the following devices::
-
-  ht0		major 37, minor 0	first  IDE tape, rewind on close.
-  ht1		major 37, minor 1	second IDE tape, rewind on close.
-  ...
-  nht0		major 37, minor 128	first  IDE tape, no rewind on close.
-  nht1		major 37, minor 129	second IDE tape, no rewind on close.
-  ...
-
-The general magnetic tape commands compatible interface, as defined by
-include/linux/mtio.h, is accessible through the character device.
-
-General ide driver configuration options, such as the interrupt-unmask
-flag, can be configured by issuing an ioctl to the block device interface,
-as any other ide device.
-
-Our own ide-tape ioctl's can be issued to either the block device or
-the character device interface.
-
-Maximal throughput with minimal bus load will usually be achieved in the
-following scenario:
-
-     1.	ide-tape is operating in the pipelined operation mode.
-     2.	No buffering is performed by the user backup program.
-
-Testing was done with a 2 GB CONNER CTMA 4000 IDE ATAPI Streaming Tape Drive.
-
-Here are some words from the first releases of hd.c, which are quoted
-in ide.c and apply here as well:
-
-* Special care is recommended.  Have Fun!
-
-Possible improvements
-=====================
-
-1. Support for the ATAPI overlap protocol.
-
-In order to maximize bus throughput, we currently use the DSC
-overlap method which enables ide.c to service requests from the
-other device while the tape is busy executing a command. The
-DSC overlap method involves polling the tape's status register
-for the DSC bit, and servicing the other device while the tape
-isn't ready.
-
-In the current QIC development standard (December 1995),
-it is recommended that new tape drives will *in addition*
-implement the ATAPI overlap protocol, which is used for the
-same purpose - efficient use of the IDE bus, but is interrupt
-driven and thus has much less CPU overhead.
-
-ATAPI overlap is likely to be supported in most new ATAPI
-devices, including new ATAPI cdroms, and thus provides us
-a method by which we can achieve higher throughput when
-sharing a (fast) ATA-2 disk with any (slow) new ATAPI device.
diff --git a/Documentation/ide/ide.rst b/Documentation/ide/ide.rst
deleted file mode 100644
index 88bdcba92f7d..000000000000
--- a/Documentation/ide/ide.rst
+++ /dev/null
@@ -1,265 +0,0 @@
-============================================
-Information regarding the Enhanced IDE drive
-============================================
-
-   The hdparm utility can be used to control various IDE features on a
-   running system. It is packaged separately.  Please Look for it on popular
-   linux FTP sites.
-
--------------------------------------------------------------------------------
-
-.. important::
-
-   BUGGY IDE CHIPSETS CAN CORRUPT DATA!!
-
-    PCI versions of the CMD640 and RZ1000 interfaces are now detected
-    automatically at startup when PCI BIOS support is configured.
-
-    Linux disables the "prefetch" ("readahead") mode of the RZ1000
-    to prevent data corruption possible due to hardware design flaws.
-
-    For the CMD640, linux disables "IRQ unmasking" (hdparm -u1) on any
-    drive for which the "prefetch" mode of the CMD640 is turned on.
-    If "prefetch" is disabled (hdparm -p8), then "IRQ unmasking" can be
-    used again.
-
-    For the CMD640, linux disables "32bit I/O" (hdparm -c1) on any drive
-    for which the "prefetch" mode of the CMD640 is turned off.
-    If "prefetch" is enabled (hdparm -p9), then "32bit I/O" can be
-    used again.
-
-    The CMD640 is also used on some Vesa Local Bus (VLB) cards, and is *NOT*
-    automatically detected by Linux.  For safe, reliable operation with such
-    interfaces, one *MUST* use the "cmd640.probe_vlb" kernel option.
-
-    Use of the "serialize" option is no longer necessary.
-
--------------------------------------------------------------------------------
-
-Common pitfalls
-===============
-
-- 40-conductor IDE cables are capable of transferring data in DMA modes up to
-  udma2, but no faster.
-
-- If possible devices should be attached to separate channels if they are
-  available. Typically the disk on the first and CD-ROM on the second.
-
-- If you mix devices on the same cable, please consider using similar devices
-  in respect of the data transfer mode they support.
-
-- Even better try to stick to the same vendor and device type on the same
-  cable.
-
-This is the multiple IDE interface driver, as evolved from hd.c
-===============================================================
-
-It supports up to 9 IDE interfaces per default, on one or more IRQs (usually
-14 & 15).  There can be up to two drives per interface, as per the ATA-6 spec.::
-
-  Primary:    ide0, port 0x1f0; major=3;  hda is minor=0; hdb is minor=64
-  Secondary:  ide1, port 0x170; major=22; hdc is minor=0; hdd is minor=64
-  Tertiary:   ide2, port 0x1e8; major=33; hde is minor=0; hdf is minor=64
-  Quaternary: ide3, port 0x168; major=34; hdg is minor=0; hdh is minor=64
-  fifth..     ide4, usually PCI, probed
-  sixth..     ide5, usually PCI, probed
-
-To access devices on interfaces > ide0, device entries please make sure that
-device files for them are present in /dev.  If not, please create such
-entries, by using /dev/MAKEDEV.
-
-This driver automatically probes for most IDE interfaces (including all PCI
-ones), for the drives/geometries attached to those interfaces, and for the IRQ
-lines being used by the interfaces (normally 14, 15 for ide0/ide1).
-
-Any number of interfaces may share a single IRQ if necessary, at a slight
-performance penalty, whether on separate cards or a single VLB card.
-The IDE driver automatically detects and handles this.  However, this may
-or may not be harmful to your hardware.. two or more cards driving the same IRQ
-can potentially burn each other's bus driver, though in practice this
-seldom occurs.  Be careful, and if in doubt, don't do it!
-
-Drives are normally found by auto-probing and/or examining the CMOS/BIOS data.
-For really weird situations, the apparent (fdisk) geometry can also be specified
-on the kernel "command line" using LILO.  The format of such lines is::
-
-	ide_core.chs=[interface_number.device_number]:cyls,heads,sects
-
-or::
-
-	ide_core.cdrom=[interface_number.device_number]
-
-For example::
-
-	ide_core.chs=1.0:1050,32,64  ide_core.cdrom=1.1
-
-The results of successful auto-probing may override the physical geometry/irq
-specified, though the "original" geometry may be retained as the "logical"
-geometry for partitioning purposes (fdisk).
-
-If the auto-probing during boot time confuses a drive (ie. the drive works
-with hd.c but not with ide.c), then an command line option may be specified
-for each drive for which you'd like the drive to skip the hardware
-probe/identification sequence.  For example::
-
-	ide_core.noprobe=0.1
-
-or::
-
-	ide_core.chs=1.0:768,16,32
-	ide_core.noprobe=1.0
-
-Note that when only one IDE device is attached to an interface, it should be
-jumpered as "single" or "master", *not* "slave".  Many folks have had
-"trouble" with cdroms because of this requirement, so the driver now probes
-for both units, though success is more likely when the drive is jumpered
-correctly.
-
-Courtesy of Scott Snyder and others, the driver supports ATAPI cdrom drives
-such as the NEC-260 and the new MITSUMI triple/quad speed drives.
-Such drives will be identified at boot time, just like a hard disk.
-
-If for some reason your cdrom drive is *not* found at boot time, you can force
-the probe to look harder by supplying a kernel command line parameter
-via LILO, such as:::
-
-	ide_core.cdrom=1.0	/* "master" on second interface (hdc) */
-
-or::
-
-	ide_core.cdrom=1.1	/* "slave" on second interface (hdd) */
-
-For example, a GW2000 system might have a hard drive on the primary
-interface (/dev/hda) and an IDE cdrom drive on the secondary interface
-(/dev/hdc).  To mount a CD in the cdrom drive, one would use something like::
-
-	ln -sf /dev/hdc /dev/cdrom
-	mkdir /mnt/cdrom
-	mount /dev/cdrom /mnt/cdrom -t iso9660 -o ro
-
-If, after doing all of the above, mount doesn't work and you see
-errors from the driver (with dmesg) complaining about `status=0xff`,
-this means that the hardware is not responding to the driver's attempts
-to read it.  One of the following is probably the problem:
-
-  - Your hardware is broken.
-
-  - You are using the wrong address for the device, or you have the
-    drive jumpered wrong.  Review the configuration instructions above.
-
-  - Your IDE controller requires some nonstandard initialization sequence
-    before it will work properly.  If this is the case, there will often
-    be a separate MS-DOS driver just for the controller.  IDE interfaces
-    on sound cards usually fall into this category.  Such configurations
-    can often be made to work by first booting MS-DOS, loading the
-    appropriate drivers, and then warm-booting linux (without powering
-    off).  This can be automated using loadlin in the MS-DOS autoexec.
-
-If you always get timeout errors, interrupts from the drive are probably
-not making it to the host.  Check how you have the hardware jumpered
-and make sure it matches what the driver expects (see the configuration
-instructions above).  If you have a PCI system, also check the BIOS
-setup; I've had one report of a system which was shipped with IRQ 15
-disabled by the BIOS.
-
-The kernel is able to execute binaries directly off of the cdrom,
-provided it is mounted with the default block size of 1024 (as above).
-
-Please pass on any feedback on any of this stuff to the maintainer,
-whose address can be found in linux/MAINTAINERS.
-
-The IDE driver is modularized.  The high level disk/CD-ROM/tape/floppy
-drivers can always be compiled as loadable modules, the chipset drivers
-can only be compiled into the kernel, and the core code (ide.c) can be
-compiled as a loadable module provided no chipset support is needed.
-
-When using ide.c as a module in combination with kmod, add::
-
-	alias block-major-3 ide-probe
-
-to a configuration file in /etc/modprobe.d/.
-
-When ide.c is used as a module, you can pass command line parameters to the
-driver using the "options=" keyword to insmod, while replacing any ',' with
-';'.
-
-
-Summary of ide driver parameters for kernel command line
-========================================================
-
-For legacy IDE VLB host drivers (ali14xx/dtc2278/ht6560b/qd65xx/umc8672)
-you need to explicitly enable probing by using "probe" kernel parameter,
-i.e. to enable probing for ALI M14xx chipsets (ali14xx host driver) use:
-
-* "ali14xx.probe" boot option when ali14xx driver is built-in the kernel
-
-* "probe" module parameter when ali14xx driver is compiled as module
-  ("modprobe ali14xx probe")
-
-Also for legacy CMD640 host driver (cmd640) you need to use "probe_vlb"
-kernel paremeter to enable probing for VLB version of the chipset (PCI ones
-are detected automatically).
-
-You also need to use "probe" kernel parameter for ide-4drives driver
-(support for IDE generic chipset with four drives on one port).
-
-To enable support for IDE doublers on Amiga use "doubler" kernel parameter
-for gayle host driver (i.e. "gayle.doubler" if the driver is built-in).
-
-To force ignoring cable detection (this should be needed only if you're using
-short 40-wires cable which cannot be automatically detected - if this is not
-a case please report it as a bug instead) use "ignore_cable" kernel parameter:
-
-* "ide_core.ignore_cable=[interface_number]" boot option if IDE is built-in
-  (i.e. "ide_core.ignore_cable=1" to force ignoring cable for "ide1")
-
-* "ignore_cable=[interface_number]" module parameter (for ide_core module)
-  if IDE is compiled as module
-
-Other kernel parameters for ide_core are:
-
-* "nodma=[interface_number.device_number]" to disallow DMA for a device
-
-* "noflush=[interface_number.device_number]" to disable flush requests
-
-* "nohpa=[interface_number.device_number]" to disable Host Protected Area
-
-* "noprobe=[interface_number.device_number]" to skip probing
-
-* "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit
-
-* "cdrom=[interface_number.device_number]" to force device as a CD-ROM
-
-* "chs=[interface_number.device_number]" to force device as a disk (using CHS)
-
-
-Some Terminology
-================
-
-IDE
-  Integrated Drive Electronics, meaning that each drive has a built-in
-  controller, which is why an "IDE interface card" is not a "controller card".
-
-ATA
-  AT (the old IBM 286 computer) Attachment Interface, a draft American
-  National Standard for connecting hard drives to PCs.  This is the official
-  name for "IDE".
-
-  The latest standards define some enhancements, known as the ATA-6 spec,
-  which grew out of vendor-specific "Enhanced IDE" (EIDE) implementations.
-
-ATAPI
-  ATA Packet Interface, a new protocol for controlling the drives,
-  similar to SCSI protocols, created at the same time as the ATA2 standard.
-  ATAPI is currently used for controlling CDROM, TAPE and FLOPPY (ZIP or
-  LS120/240) devices, removable R/W cartridges, and for high capacity hard disk
-  drives.
-
-mlord@pobox.com
-
-
-Wed Apr 17 22:52:44 CEST 2002 edited by Marcin Dalecki, the current
-maintainer.
-
-Wed Aug 20 22:31:29 CEST 2003 updated ide boot options to current ide.c
-comments at 2.6.0-test4 time. Maciej Soltysiak <solt@dns.toxicfilms.tv>
diff --git a/Documentation/ide/index.rst b/Documentation/ide/index.rst
deleted file mode 100644
index 813dfe611a31..000000000000
--- a/Documentation/ide/index.rst
+++ /dev/null
@@ -1,21 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-==================================
-Integrated Drive Electronics (IDE)
-==================================
-
-.. toctree::
-    :maxdepth: 1
-
-    ide
-    ide-tape
-    warm-plug-howto
-
-    changelogs
-
-.. only::  subproject and html
-
-   Indices
-   =======
-
-   * :ref:`genindex`
diff --git a/Documentation/ide/warm-plug-howto.rst b/Documentation/ide/warm-plug-howto.rst
deleted file mode 100644
index c245242ef2f1..000000000000
--- a/Documentation/ide/warm-plug-howto.rst
+++ /dev/null
@@ -1,18 +0,0 @@
-===================
-IDE warm-plug HOWTO
-===================
-
-To warm-plug devices on a port 'idex'::
-
-	# echo -n "1" > /sys/class/ide_port/idex/delete_devices
-
-unplug old device(s) and plug new device(s)::
-
-	# echo -n "1" > /sys/class/ide_port/idex/scan
-
-done
-
-NOTE: please make sure that partitions are unmounted and that there are
-no other active references to devices before doing "delete_devices" step,
-also do not attempt "scan" step on devices currently in use -- otherwise
-results may be unpredictable and lead to data loss if you're unlucky
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 1988c19d9daf..062cf88c2216 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -103,7 +103,6 @@ needed).
    block/index
    cdrom/index
    cpu-freq/index
-   ide/index
    fb/index
    fpga/index
    hid/index
@@ -169,7 +168,6 @@ to ReStructured Text format, or are simply too old.
 
    tools/index
    staging/index
-   watch_queue
 
 
 Translations
diff --git a/Documentation/input/devices/atarikbd.rst b/Documentation/input/devices/atarikbd.rst
index 745e7a1ff122..0c4c7804ccb2 100644
--- a/Documentation/input/devices/atarikbd.rst
+++ b/Documentation/input/devices/atarikbd.rst
@@ -288,7 +288,7 @@ between 0 and large positive numbers. Excess motion below 0 is ignored. The
 command sets the maximum positive value that can be attained in the scaled
 coordinate system. Motion beyond that value is also ignored.
 
-SET MOUSE KEYCODE MOSE
+SET MOUSE KEYCODE MODE
 ----------------------
 
 ::
@@ -333,7 +333,7 @@ occur before the internally maintained coordinate is changed by one
 (independently scaled for each axis). Remember that the mouse position
 information is available only by interrogating the ikbd in the ABSOLUTE MOUSE
 POSITIONING mode unless the ikbd has been commanded to report on button press
-or release (see SET MOSE BUTTON ACTION).
+or release (see SET MOUSE BUTTON ACTION).
 
 INTERROGATE MOUSE POSITION
 --------------------------
diff --git a/Documentation/input/devices/ntrig.rst b/Documentation/input/devices/ntrig.rst
index a6b22ce6c61c..1559f53495cb 100644
--- a/Documentation/input/devices/ntrig.rst
+++ b/Documentation/input/devices/ntrig.rst
@@ -32,7 +32,7 @@ The following parameters are used to configure filters to reduce noise:
 |activation_height,	|size threshold to activate immediately		      |
 |activation_width	|						      |
 +-----------------------+-----------------------------------------------------+
-|min_height,		|size threshold bellow which fingers are ignored      |
+|min_height,		|size threshold below which fingers are ignored       |
 |min_width		|both to decide activation and during activity	      |
 +-----------------------+-----------------------------------------------------+
 |deactivate_slack	|the number of "no contact" frames to ignore before   |
diff --git a/Documentation/kbuild/reproducible-builds.rst b/Documentation/kbuild/reproducible-builds.rst
index 3b25655e441b..071f0151a7a4 100644
--- a/Documentation/kbuild/reproducible-builds.rst
+++ b/Documentation/kbuild/reproducible-builds.rst
@@ -99,10 +99,10 @@ unreproducible parts can be treated as sources:
 Structure randomisation
 -----------------------
 
-If you enable ``CONFIG_GCC_PLUGIN_RANDSTRUCT``, you will need to
-pre-generate the random seed in
-``scripts/gcc-plugins/randomize_layout_seed.h`` so the same value
-is used in rebuilds.
+If you enable ``CONFIG_RANDSTRUCT``, you will need to pre-generate
+the random seed in ``scripts/basic/randstruct.seed`` so the same
+value is used by each build. See ``scripts/gen-randstruct-seed.sh``
+for details.
 
 Debug info conflicts
 --------------------
diff --git a/Documentation/kernel-hacking/hacking.rst b/Documentation/kernel-hacking/hacking.rst
index 55bd37a2efb0..ebd9d90882ea 100644
--- a/Documentation/kernel-hacking/hacking.rst
+++ b/Documentation/kernel-hacking/hacking.rst
@@ -112,8 +112,7 @@ time, although different tasklets can run simultaneously.
 .. warning::
 
     The name 'tasklet' is misleading: they have nothing to do with
-    'tasks', and probably more to do with some bad vodka Alexey
-    Kuznetsov had at the time.
+    'tasks'.
 
 You can tell you are in a softirq (or tasklet) using the
 :c:func:`in_softirq()` macro (``include/linux/preempt.h``).
@@ -290,8 +289,8 @@ userspace.
     Unlike :c:func:`put_user()` and :c:func:`get_user()`, they
     return the amount of uncopied data (ie. 0 still means success).
 
-[Yes, this moronic interface makes me cringe. The flamewar comes up
-every year or so. --RR.]
+[Yes, this objectionable interface makes me cringe. The flamewar comes
+up every year or so. --RR.]
 
 The functions may sleep implicitly. This should never be called outside
 user context (it makes no sense), with interrupts disabled, or a
@@ -645,8 +644,9 @@ names in development kernels; this is not done just to keep everyone on
 their toes: it reflects a fundamental change (eg. can no longer be
 called with interrupts on, or does extra checks, or doesn't do checks
 which were caught before). Usually this is accompanied by a fairly
-complete note to the linux-kernel mailing list; search the archive.
-Simply doing a global replace on the file usually makes things **worse**.
+complete note to the appropriate kernel development mailing list; search
+the archives. Simply doing a global replace on the file usually makes
+things **worse**.
 
 Initializing structure members
 ------------------------------
@@ -723,14 +723,14 @@ Putting Your Stuff in the Kernel
 In order to get your stuff into shape for official inclusion, or even to
 make a neat patch, there's administrative work to be done:
 
--  Figure out whose pond you've been pissing in. Look at the top of the
-   source files, inside the ``MAINTAINERS`` file, and last of all in the
-   ``CREDITS`` file. You should coordinate with this person to make sure
-   you're not duplicating effort, or trying something that's already
-   been rejected.
+-  Figure out who are the owners of the code you've been modifying. Look
+   at the top of the source files, inside the ``MAINTAINERS`` file, and
+   last of all in the ``CREDITS`` file. You should coordinate with these
+   people to make sure you're not duplicating effort, or trying something
+   that's already been rejected.
 
-   Make sure you put your name and EMail address at the top of any files
-   you create or mangle significantly. This is the first place people
+   Make sure you put your name and email address at the top of any files
+   you create or modify significantly. This is the first place people
    will look when they find a bug, or when **they** want to make a change.
 
 -  Usually you want a configuration option for your kernel hack. Edit
@@ -748,11 +748,11 @@ make a neat patch, there's administrative work to be done:
    can usually just add a "obj-$(CONFIG_xxx) += xxx.o" line. The syntax
    is documented in ``Documentation/kbuild/makefiles.rst``.
 
--  Put yourself in ``CREDITS`` if you've done something noteworthy,
-   usually beyond a single file (your name should be at the top of the
-   source files anyway). ``MAINTAINERS`` means you want to be consulted
-   when changes are made to a subsystem, and hear about bugs; it implies
-   a more-than-passing commitment to some part of the code.
+-  Put yourself in ``CREDITS`` if you consider what you've done
+   noteworthy, usually beyond a single file (your name should be at the
+   top of the source files anyway). ``MAINTAINERS`` means you want to be
+   consulted when changes are made to a subsystem, and hear about bugs;
+   it implies a more-than-passing commitment to some part of the code.
 
 -  Finally, don't forget to read
    ``Documentation/process/submitting-patches.rst`` and possibly
diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst
index 4cbd50edf277..6805ae6e86e6 100644
--- a/Documentation/kernel-hacking/locking.rst
+++ b/Documentation/kernel-hacking/locking.rst
@@ -941,8 +941,7 @@ lock.
 
 A classic problem here is when you provide callbacks or hooks: if you
 call these with the lock held, you risk simple deadlock, or a deadly
-embrace (who knows what the callback will do?). Remember, the other
-programmers are out to get you, so don't do this.
+embrace (who knows what the callback will do?).
 
 Overzealous Prevention Of Deadlocks
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -952,8 +951,6 @@ grabs a read lock, searches a list, fails to find what it wants, drops
 the read lock, grabs a write lock and inserts the object has a race
 condition.
 
-If you don't see why, please stay away from my code.
-
 Racing Timers: A Kernel Pastime
 -------------------------------
 
diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst
index 49549aab41b4..feb257b7f350 100644
--- a/Documentation/power/energy-model.rst
+++ b/Documentation/power/energy-model.rst
@@ -123,6 +123,26 @@ allows a platform to register EM power values which are reflecting total power
 (static + dynamic). These power values might be coming directly from
 experiments and measurements.
 
+Registration of 'artificial' EM
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There is an option to provide a custom callback for drivers missing detailed
+knowledge about power value for each performance state. The callback
+.get_cost() is optional and provides the 'cost' values used by the EAS.
+This is useful for platforms that only provide information on relative
+efficiency between CPU types, where one could use the information to
+create an abstract power model. But even an abstract power model can
+sometimes be hard to fit in, given the input power value size restrictions.
+The .get_cost() allows to provide the 'cost' values which reflect the
+efficiency of the CPUs. This would allow to provide EAS information which
+has different relation than what would be forced by the EM internal
+formulas calculating 'cost' values. To register an EM for such platform, the
+driver must set the flag 'milliwatts' to 0, provide .get_power() callback
+and provide .get_cost() callback. The EM framework would handle such platform
+properly during registration. A flag EM_PERF_DOMAIN_ARTIFICIAL is set for such
+platform. Special care should be taken by other frameworks which are using EM
+to test and treat this flag properly.
+
 Registration of 'simple' EM
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -181,8 +201,8 @@ EM framework::
 
   -> drivers/cpufreq/foo_cpufreq.c
 
-  01	static int est_power(unsigned long *mW, unsigned long *KHz,
-  02			struct device *dev)
+  01	static int est_power(struct device *dev, unsigned long *mW,
+  02			unsigned long *KHz)
   03	{
   04		long freq, power;
   05
diff --git a/Documentation/process/3.Early-stage.rst b/Documentation/process/3.Early-stage.rst
index 6bfd60d77d1a..894a920041c6 100644
--- a/Documentation/process/3.Early-stage.rst
+++ b/Documentation/process/3.Early-stage.rst
@@ -154,10 +154,11 @@ that the kernel developers have added a script to ease the process:
 This script will return the current maintainer(s) for a given file or
 directory when given the "-f" option.  If passed a patch on the
 command line, it will list the maintainers who should probably receive
-copies of the patch.  There are a number of options regulating how hard
-get_maintainer.pl will search for maintainers; please be careful about
-using the more aggressive options as you may end up including developers
-who have no real interest in the code you are modifying.
+copies of the patch.  This is the preferred way (unlike "-f" option) to get the
+list of people to Cc for your patches.  There are a number of options
+regulating how hard get_maintainer.pl will search for maintainers; please be
+careful about using the more aggressive options as you may end up including
+developers who have no real interest in the code you are modifying.
 
 If all else fails, talking to Andrew Morton can be an effective way to
 track down a maintainer for a specific piece of code.
diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index a337e8eabfe1..34415ae1af1b 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -7,7 +7,7 @@ Intro
 =====
 
 This document is designed to provide a list of the minimum levels of
-software necessary to run the 4.x kernels.
+software necessary to run the current kernel version.
 
 This document is originally based on my "Changes" file for 2.0.x kernels
 and therefore owes credit to the same people as that file (Jared Mauch,
@@ -56,6 +56,7 @@ iptables               1.4.2            iptables -V
 openssl & libcrypto    1.0.0            openssl version
 bc                     1.06.95          bc --version
 Sphinx\ [#f1]_         1.7              sphinx-build --version
+cpio                   any              cpio --version
 ====================== ===============  ========================================
 
 .. [#f1] Sphinx is needed only to build the Kernel documentation
@@ -458,6 +459,11 @@ mcelog
 
 - <http://www.mcelog.org/>
 
+cpio
+----
+
+- <https://www.gnu.org/software/cpio/>
+
 Networking
 **********
 
diff --git a/Documentation/process/maintainer-tip.rst b/Documentation/process/maintainer-tip.rst
index c74f4a81588b..572a3289c9cb 100644
--- a/Documentation/process/maintainer-tip.rst
+++ b/Documentation/process/maintainer-tip.rst
@@ -437,6 +437,20 @@ in a private repository which allows interested people to easily pull the
 series for testing. The usual way to offer this is a git URL in the cover
 letter of the patch series.
 
+Testing
+^^^^^^^
+
+Code should be tested before submitting to the tip maintainers.  Anything
+other than minor changes should be built, booted and tested with
+comprehensive (and heavyweight) kernel debugging options enabled.
+
+These debugging options can be found in kernel/configs/x86_debug.config
+and can be added to an existing kernel config by running:
+
+	make x86_debug.config
+
+Some of these options are x86-specific and can be left out when testing
+on other architectures.
 
 Coding style notes
 ------------------
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index fb496b2ebfd3..a1cb6280fbcf 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -77,7 +77,7 @@ as you intend it to.
 
 The maintainer will thank you if you write your patch description in a
 form which can be easily pulled into Linux's source code management
-system, ``git``, as a "commit log".  See :ref:`explicit_in_reply_to`.
+system, ``git``, as a "commit log".  See :ref:`the_canonical_patch_format`.
 
 Solve only one problem per patch.  If your description starts to get
 long, that's a sign that you probably need to split up your patch.
@@ -227,9 +227,10 @@ Select the recipients for your patch
 You should always copy the appropriate subsystem maintainer(s) on any patch
 to code that they maintain; look through the MAINTAINERS file and the
 source code revision history to see who those maintainers are.  The
-script scripts/get_maintainer.pl can be very useful at this step.  If you
-cannot find a maintainer for the subsystem you are working on, Andrew
-Morton (akpm@linux-foundation.org) serves as a maintainer of last resort.
+script scripts/get_maintainer.pl can be very useful at this step (pass paths to
+your patches as arguments to scripts/get_maintainer.pl).  If you cannot find a
+maintainer for the subsystem you are working on, Andrew Morton
+(akpm@linux-foundation.org) serves as a maintainer of last resort.
 
 You should also normally choose at least one mailing list to receive a copy
 of your patch set.  linux-kernel@vger.kernel.org should be used by default
@@ -318,7 +319,10 @@ understands what is going on.
 Be sure to tell the reviewers what changes you are making and to thank them
 for their time.  Code review is a tiring and time-consuming process, and
 reviewers sometimes get grumpy.  Even in that case, though, respond
-politely and address the problems they have pointed out.
+politely and address the problems they have pointed out.  When sending a next
+version, add a ``patch changelog`` to the cover letter or to individual patches
+explaining difference aganst previous submission (see
+:ref:`the_canonical_patch_format`).
 
 See Documentation/process/email-clients.rst for recommendations on email
 clients and mailing list etiquette.
diff --git a/Documentation/scheduler/sched-stats.rst b/Documentation/scheduler/sched-stats.rst
index dd9b99a025f7..03c062915998 100644
--- a/Documentation/scheduler/sched-stats.rst
+++ b/Documentation/scheduler/sched-stats.rst
@@ -56,9 +56,9 @@ Next two are try_to_wake_up() statistics:
 
 Next three are statistics describing scheduling latency:
 
-     7) sum of all time spent running by tasks on this processor (in jiffies)
+     7) sum of all time spent running by tasks on this processor (in nanoseconds)
      8) sum of all time spent waiting to run by tasks on this processor (in
-        jiffies)
+        nanoseconds)
      9) # of timeslices run on this cpu
 
 
@@ -155,8 +155,8 @@ schedstats also adds a new /proc/<pid>/schedstat file to include some of
 the same information on a per-process level.  There are three fields in
 this file correlating for that process to:
 
-     1) time spent on the cpu
-     2) time spent waiting on a runqueue
+     1) time spent on the cpu (in nanoseconds)
+     2) time spent waiting on a runqueue (in nanoseconds)
      3) # of timeslices run on this cpu
 
 A program could be easily written to make use of these extra fields to
diff --git a/Documentation/security/IMA-templates.rst b/Documentation/security/IMA-templates.rst
index 1a91d92950a7..15b4add314fc 100644
--- a/Documentation/security/IMA-templates.rst
+++ b/Documentation/security/IMA-templates.rst
@@ -66,12 +66,13 @@ descriptors by adding their identifier to the format string
    calculated with the SHA1 or MD5 hash algorithm;
  - 'n': the name of the event (i.e. the file name), with size up to 255 bytes;
  - 'd-ng': the digest of the event, calculated with an arbitrary hash
-   algorithm (field format: [<hash algo>:]digest, where the digest
-   prefix is shown only if the hash algorithm is not SHA1 or MD5);
+   algorithm (field format: <hash algo>:digest);
+ - 'd-ngv2': same as d-ng, but prefixed with the "ima" or "verity" digest type
+   (field format: <digest type>:<hash algo>:digest);
  - 'd-modsig': the digest of the event without the appended modsig;
  - 'n-ng': the name of the event, without size limitations;
- - 'sig': the file signature, or the EVM portable signature if the file
-   signature is not found;
+ - 'sig': the file signature, based on either the file's/fsverity's digest[1],
+   or the EVM portable signature, if 'security.ima' contains a file hash.
  - 'modsig' the appended file signature;
  - 'buf': the buffer data that was used to generate the hash without size limitations;
  - 'evmsig': the EVM portable signature;
@@ -88,7 +89,9 @@ Below, there is the list of defined template descriptors:
 
  - "ima": its format is ``d|n``;
  - "ima-ng" (default): its format is ``d-ng|n-ng``;
+ - "ima-ngv2": its format is ``d-ngv2|n-ng``;
  - "ima-sig": its format is ``d-ng|n-ng|sig``;
+ - "ima-sigv2": its format is ``d-ngv2|n-ng|sig``;
  - "ima-buf": its format is ``d-ng|n-ng|buf``;
  - "ima-modsig": its format is ``d-ng|n-ng|sig|d-modsig|modsig``;
  - "evm-sig": its format is ``d-ng|n-ng|evmsig|xattrnames|xattrlengths|xattrvalues|iuid|igid|imode``;
diff --git a/Documentation/security/index.rst b/Documentation/security/index.rst
index 16335de04e8c..6ed8d2fa6f9e 100644
--- a/Documentation/security/index.rst
+++ b/Documentation/security/index.rst
@@ -17,3 +17,4 @@ Security Documentation
    tpm/index
    digsig
    landlock
+   secrets/index
diff --git a/Documentation/security/keys/trusted-encrypted.rst b/Documentation/security/keys/trusted-encrypted.rst
index f614dad7de12..0bfb4c339748 100644
--- a/Documentation/security/keys/trusted-encrypted.rst
+++ b/Documentation/security/keys/trusted-encrypted.rst
@@ -35,6 +35,13 @@ safe.
          Rooted to Hardware Unique Key (HUK) which is generally burnt in on-chip
          fuses and is accessible to TEE only.
 
+     (3) CAAM (Cryptographic Acceleration and Assurance Module: IP on NXP SoCs)
+
+         When High Assurance Boot (HAB) is enabled and the CAAM is in secure
+         mode, trust is rooted to the OTPMK, a never-disclosed 256-bit key
+         randomly generated and fused into each SoC at manufacturing time.
+         Otherwise, a common fixed test key is used instead.
+
   *  Execution isolation
 
      (1) TPM
@@ -46,6 +53,10 @@ safe.
          Customizable set of operations running in isolated execution
          environment verified via Secure/Trusted boot process.
 
+     (3) CAAM
+
+         Fixed set of operations running in isolated execution environment.
+
   * Optional binding to platform integrity state
 
      (1) TPM
@@ -63,6 +74,11 @@ safe.
          Relies on Secure/Trusted boot process for platform integrity. It can
          be extended with TEE based measured boot process.
 
+     (3) CAAM
+
+         Relies on the High Assurance Boot (HAB) mechanism of NXP SoCs
+         for platform integrity.
+
   *  Interfaces and APIs
 
      (1) TPM
@@ -74,10 +90,13 @@ safe.
          TEEs have well-documented, standardized client interface and APIs. For
          more details refer to ``Documentation/staging/tee.rst``.
 
+     (3) CAAM
+
+         Interface is specific to silicon vendor.
 
   *  Threat model
 
-     The strength and appropriateness of a particular TPM or TEE for a given
+     The strength and appropriateness of a particular trust source for a given
      purpose must be assessed when using them to protect security-relevant data.
 
 
@@ -87,22 +106,32 @@ Key Generation
 Trusted Keys
 ------------
 
-New keys are created from random numbers generated in the trust source. They
-are encrypted/decrypted using a child key in the storage key hierarchy.
-Encryption and decryption of the child key must be protected by a strong
-access control policy within the trust source.
+New keys are created from random numbers. They are encrypted/decrypted using
+a child key in the storage key hierarchy. Encryption and decryption of the
+child key must be protected by a strong access control policy within the
+trust source. The random number generator in use differs according to the
+selected trust source:
 
-  *  TPM (hardware device) based RNG
+  *  TPM: hardware device based RNG
 
-     Strength of random numbers may vary from one device manufacturer to
-     another.
+     Keys are generated within the TPM. Strength of random numbers may vary
+     from one device manufacturer to another.
 
-  *  TEE (OP-TEE based on Arm TrustZone) based RNG
+  *  TEE: OP-TEE based on Arm TrustZone based RNG
 
      RNG is customizable as per platform needs. It can either be direct output
      from platform specific hardware RNG or a software based Fortuna CSPRNG
      which can be seeded via multiple entropy sources.
 
+  *  CAAM: Kernel RNG
+
+     The normal kernel random number generator is used. To seed it from the
+     CAAM HWRNG, enable CRYPTO_DEV_FSL_CAAM_RNG_API and ensure the device
+     is probed.
+
+Users may override this by specifying ``trusted.rng=kernel`` on the kernel
+command-line to override the used RNG with the kernel's random number pool.
+
 Encrypted Keys
 --------------
 
@@ -189,6 +218,19 @@ Usage::
 specific to TEE device implementation.  The key length for new keys is always
 in bytes. Trusted Keys can be 32 - 128 bytes (256 - 1024 bits).
 
+Trusted Keys usage: CAAM
+------------------------
+
+Usage::
+
+    keyctl add trusted name "new keylen" ring
+    keyctl add trusted name "load hex_blob" ring
+    keyctl print keyid
+
+"keyctl print" returns an ASCII hex copy of the sealed key, which is in a
+CAAM-specific format.  The key length for new keys is always in bytes.
+Trusted Keys can be 32 - 128 bytes (256 - 1024 bits).
+
 Encrypted Keys usage
 --------------------
 
diff --git a/Documentation/security/landlock.rst b/Documentation/security/landlock.rst
index 3df68cb1d10f..5c77730b4479 100644
--- a/Documentation/security/landlock.rst
+++ b/Documentation/security/landlock.rst
@@ -7,7 +7,7 @@ Landlock LSM: kernel documentation
 ==================================
 
 :Author: Mickaël Salaün
-:Date: March 2021
+:Date: May 2022
 
 Landlock's goal is to create scoped access-control (i.e. sandboxing).  To
 harden a whole system, this feature should be available to any process,
@@ -42,6 +42,21 @@ Guiding principles for safe access controls
 * Computation related to Landlock operations (e.g. enforcing a ruleset) shall
   only impact the processes requesting them.
 
+Design choices
+==============
+
+Filesystem access rights
+------------------------
+
+All access rights are tied to an inode and what can be accessed through it.
+Reading the content of a directory doesn't imply to be allowed to read the
+content of a listed inode.  Indeed, a file name is local to its parent
+directory, and an inode can be referenced by multiple file names thanks to
+(hard) links.  Being able to unlink a file only has a direct impact on the
+directory, not the unlinked inode.  This is the reason why
+`LANDLOCK_ACCESS_FS_REMOVE_FILE` or `LANDLOCK_ACCESS_FS_REFER` are not allowed
+to be tied to files but only to directories.
+
 Tests
 =====
 
diff --git a/Documentation/security/secrets/coco.rst b/Documentation/security/secrets/coco.rst
new file mode 100644
index 000000000000..262e7abb1b24
--- /dev/null
+++ b/Documentation/security/secrets/coco.rst
@@ -0,0 +1,103 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+Confidential Computing secrets
+==============================
+
+This document describes how Confidential Computing secret injection is handled
+from the firmware to the operating system, in the EFI driver and the efi_secret
+kernel module.
+
+
+Introduction
+============
+
+Confidential Computing (coco) hardware such as AMD SEV (Secure Encrypted
+Virtualization) allows guest owners to inject secrets into the VMs
+memory without the host/hypervisor being able to read them.  In SEV,
+secret injection is performed early in the VM launch process, before the
+guest starts running.
+
+The efi_secret kernel module allows userspace applications to access these
+secrets via securityfs.
+
+
+Secret data flow
+================
+
+The guest firmware may reserve a designated memory area for secret injection,
+and publish its location (base GPA and length) in the EFI configuration table
+under a ``LINUX_EFI_COCO_SECRET_AREA_GUID`` entry
+(``adf956ad-e98c-484c-ae11-b51c7d336447``).  This memory area should be marked
+by the firmware as ``EFI_RESERVED_TYPE``, and therefore the kernel should not
+be use it for its own purposes.
+
+During the VM's launch, the virtual machine manager may inject a secret to that
+area.  In AMD SEV and SEV-ES this is performed using the
+``KVM_SEV_LAUNCH_SECRET`` command (see [sev]_).  The strucutre of the injected
+Guest Owner secret data should be a GUIDed table of secret values; the binary
+format is described in ``drivers/virt/coco/efi_secret/efi_secret.c`` under
+"Structure of the EFI secret area".
+
+On kernel start, the kernel's EFI driver saves the location of the secret area
+(taken from the EFI configuration table) in the ``efi.coco_secret`` field.
+Later it checks if the secret area is populated: it maps the area and checks
+whether its content begins with ``EFI_SECRET_TABLE_HEADER_GUID``
+(``1e74f542-71dd-4d66-963e-ef4287ff173b``).  If the secret area is populated,
+the EFI driver will autoload the efi_secret kernel module, which exposes the
+secrets to userspace applications via securityfs.  The details of the
+efi_secret filesystem interface are in [secrets-coco-abi]_.
+
+
+Application usage example
+=========================
+
+Consider a guest performing computations on encrypted files.  The Guest Owner
+provides the decryption key (= secret) using the secret injection mechanism.
+The guest application reads the secret from the efi_secret filesystem and
+proceeds to decrypt the files into memory and then performs the needed
+computations on the content.
+
+In this example, the host can't read the files from the disk image
+because they are encrypted.  Host can't read the decryption key because
+it is passed using the secret injection mechanism (= secure channel).
+Host can't read the decrypted content from memory because it's a
+confidential (memory-encrypted) guest.
+
+Here is a simple example for usage of the efi_secret module in a guest
+to which an EFI secret area with 4 secrets was injected during launch::
+
+	# ls -la /sys/kernel/security/secrets/coco
+	total 0
+	drwxr-xr-x 2 root root 0 Jun 28 11:54 .
+	drwxr-xr-x 3 root root 0 Jun 28 11:54 ..
+	-r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+	-r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+	-r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+	-r--r----- 1 root root 0 Jun 28 11:54 e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+	# hd /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+	00000000  74 68 65 73 65 2d 61 72  65 2d 74 68 65 2d 6b 61  |these-are-the-ka|
+	00000010  74 61 2d 73 65 63 72 65  74 73 00 01 02 03 04 05  |ta-secrets......|
+	00000020  06 07                                             |..|
+	00000022
+
+	# rm /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+	# ls -la /sys/kernel/security/secrets/coco
+	total 0
+	drwxr-xr-x 2 root root 0 Jun 28 11:55 .
+	drwxr-xr-x 3 root root 0 Jun 28 11:54 ..
+	-r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+	-r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+	-r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+
+
+References
+==========
+
+See [sev-api-spec]_ for more info regarding SEV ``LAUNCH_SECRET`` operation.
+
+.. [sev] Documentation/virt/kvm/amd-memory-encryption.rst
+.. [secrets-coco-abi] Documentation/ABI/testing/securityfs-secrets-coco
+.. [sev-api-spec] https://www.amd.com/system/files/TechDocs/55766_SEV-KM_API_Specification.pdf
diff --git a/Documentation/security/secrets/index.rst b/Documentation/security/secrets/index.rst
new file mode 100644
index 000000000000..ced34e9c43bd
--- /dev/null
+++ b/Documentation/security/secrets/index.rst
@@ -0,0 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Secrets documentation
+=====================
+
+.. toctree::
+
+   coco
diff --git a/Documentation/sphinx/kerneldoc-preamble.sty b/Documentation/sphinx/kerneldoc-preamble.sty
index 9d0204dc38be..2a29cbe51396 100644
--- a/Documentation/sphinx/kerneldoc-preamble.sty
+++ b/Documentation/sphinx/kerneldoc-preamble.sty
@@ -20,13 +20,13 @@
 %  - Indent of 2 chars is preserved for ease of comparison.
 % Summary of changes from default params:
 %   Width of page number (\@pnumwidth): 1.55em -> 2.7em
-%   Width of chapter number:            1.5em  -> 1.8em
-%   Indent of section number:           1.5em  -> 1.8em
+%   Width of chapter number:            1.5em  -> 2.4em
+%   Indent of section number:           1.5em  -> 2.4em
 %   Width of section number:            2.6em  -> 3.2em
-%   Indent of sebsection number:        4.1em  -> 5em
+%   Indent of subsection number:        4.1em  -> 5.6em
 %   Width of subsection number:         3.5em  -> 4.3em
 %
-% These params can have 4 digit page counts, 2 digit chapter counts,
+% These params can have 4 digit page counts, 3 digit chapter counts,
 % section counts of 4 digits + 1 period (e.g., 18.10), and subsection counts
 % of 5 digits + 2 periods (e.g., 18.7.13).
 \makeatletter
@@ -37,7 +37,7 @@
   \ifnum \c@tocdepth >\m@ne
     \addpenalty{-\@highpenalty}%
     \vskip 1.0em \@plus\p@
-    \setlength\@tempdima{1.8em}%
+    \setlength\@tempdima{2.4em}%
     \begingroup
       \parindent \z@ \rightskip \@pnumwidth
       \parfillskip -\@pnumwidth
@@ -51,8 +51,8 @@
     \endgroup
   \fi}
 %% Redefine \l@section and \l@subsection
-\renewcommand*\l@section{\@dottedtocline{1}{1.8em}{3.2em}}
-\renewcommand*\l@subsection{\@dottedtocline{2}{5em}{4.3em}}
+\renewcommand*\l@section{\@dottedtocline{1}{2.4em}{3.2em}}
+\renewcommand*\l@subsection{\@dottedtocline{2}{5.6em}{4.3em}}
 \makeatother
 %% Sphinx < 1.8 doesn't have \sphinxtableofcontentshook
 \providecommand{\sphinxtableofcontentshook}{}
diff --git a/Documentation/tools/rtla/common_appendix.rst b/Documentation/tools/rtla/common_appendix.rst
index b494084acccd..b5cf2dc223df 100644
--- a/Documentation/tools/rtla/common_appendix.rst
+++ b/Documentation/tools/rtla/common_appendix.rst
@@ -1,6 +1,7 @@
 REPORTING BUGS
 ==============
-Report bugs to <lkml@vger.kernel.org>
+Report bugs to <linux-kernel@vger.kernel.org>
+and <linux-trace-devel@vger.kernel.org>
 
 LICENSE
 =======
diff --git a/Documentation/translations/ja_JP/SubmittingPatches b/Documentation/translations/ja_JP/SubmittingPatches
index 0d308edef781..66ce0d8b0526 100644
--- a/Documentation/translations/ja_JP/SubmittingPatches
+++ b/Documentation/translations/ja_JP/SubmittingPatches
@@ -81,9 +81,7 @@ Linux カーネルに対する全ての変更は diff(1) コマンドによる�
 dontdiff ファイルには Linux カーネルのビルドプロセスの過程で生成された
 ファイルの一覧がのっています。そして、それらはパッチを生成する diff(1)
 コマンドで無視されるべきです。dontdiff ファイルは 2.6.12 以後のバージョ
-ンの Linux カーネルソースツリーに含まれています。それより前のバージョン
-の Linux カーネルソースツリーに対する dontdiff ファイルは、
-<http://www.xenotime.net/linux/doc/dontdiff>から取得することができます。
+ンの Linux カーネルソースツリーに含まれています。
 
 投稿するパッチの中に関係のない余分なファイルが含まれていないことを確
 認してください。diff(1) コマンドで生成したパッチがあなたの意図したとお
@@ -125,6 +123,17 @@ http://savannah.nongnu.org/projects/quilt
 登録済みのバグエントリを修正するパッチであれば、そのバグエントリを示すバグ ID
 や URL を明記してください。
 
+特定のコミットを参照したい場合は、その SHA-1 ID だけでなく、一行サマリ
+も含めてください。それにより、それが何に関するコミットなのかがレビューする
+人にわかりやすくなります。
+例 (英文のママ):
+
+       Commit e21d2170f36602ae2708 ("video: remove unnecessary
+       platform_set_drvdata()") removed the unnecessary
+       platform_set_drvdata(), but left the variable "dev" unused,
+       delete it.
+
+
 3) パッチの分割
 
 意味のあるひとまとまりごとに変更を個々のパッチファイルに分けてください。
@@ -162,7 +171,8 @@ http://savannah.nongnu.org/projects/quilt
 
 MAINTAINERS ファイルとソースコードに目を通してください。そして、その変
 更がメンテナのいる特定のサブシステムに加えられるものであることが分か
-れば、その人に電子メールを送ってください。
+れば、その人に電子メールを送ってください。その際
+./scripts/get_maintainers.pl のスクリプトが有用です。
 
 もし、メンテナが載っていなかったり、メンテナからの応答がないなら、
 LKML ( linux-kernel@vger.kernel.org )へパッチを送ってください。ほとんど
@@ -400,7 +410,7 @@ Acked-by: が必ずしもパッチ全体の承認を示しているわけでは�
 このタグはパッチに関心があると思われる人達がそのパッチの議論に含まれていたこと
 を明文化します。
 
-14) Reported-by と Tested-by: と Reviewed-by: の利用
+14) Reported-by:, Tested-by:, Reviewed-by: および Suggested-by: の利用
 
 他の誰かによって報告された問題を修正するパッチであれば、問題報告者という寄与を
 クレジットするために、Reported-by: タグを追加することを検討してください。
@@ -449,6 +459,13 @@ Reviewd-by タグはそのパッチがカーネルに対して適切な修正で
 レビューを実施したレビューアによって提供される時、Reviewed-by: タグがあなたの
 パッチをカーネルにマージする可能性を高めるでしょう。
 
+Suggested-by: タグは、パッチのアイデアがその人からの提案に基づくものである
+ことを示し、アイデアの提供をクレジットするものです。提案者の明示的な許可が
+ない場合、特にそのアイデアが公開のフォーラムで示されていない場合には、この
+タグをつけないように注意してください。とはいえ、アイデアの提供者をこつこつ
+クレジットしていけば、望むらくはその人たちが将来別の機会に再度力を貸す気に
+なってくれるかもしれません。
+
 15) 標準的なパッチのフォーマット
 
 標準的なパッチのサブジェクトは以下のとおりです。
@@ -681,10 +698,11 @@ Jeff Garzik, "Linux kernel patch submission format".
   <https://web.archive.org/web/20180829112450/http://linux.yyz.us/patch-format.html>
 
 Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer".
-  <http://www.kroah.com/log/2005/03/31/>
-  <http://www.kroah.com/log/2005/07/08/>
-  <http://www.kroah.com/log/2005/10/19/>
-  <http://www.kroah.com/log/2006/01/11/>
+  <http://www.kroah.com/log/linux/maintainer.html>
+  <http://www.kroah.com/log/linux/maintainer-02.html>
+  <http://www.kroah.com/log/linux/maintainer-03.html>
+  <http://www.kroah.com/log/linux/maintainer-04.html>
+  <http://www.kroah.com/log/linux/maintainer-05.html>
 
 NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
   <https://lore.kernel.org/r/20050711.125305.08322243.davem@davemloft.net>
diff --git a/Documentation/translations/ja_JP/howto.rst b/Documentation/translations/ja_JP/howto.rst
index d667f9d8a02a..38fed6fe62fe 100644
--- a/Documentation/translations/ja_JP/howto.rst
+++ b/Documentation/translations/ja_JP/howto.rst
@@ -262,21 +262,21 @@ Linux カーネルの開発プロセスは現在幾つかの異なるメイン�
 チ」と多数のサブシステム毎のカーネルブランチから構成されます。これらの
 ブランチとは -
 
-  - メインの 4.x カーネルツリー
-  - 4.x.y -stable カーネルツリー
-  - サブシステム毎のカーネルツリーとパッチ
-  - 統合テストのための 4.x -next カーネルツリー
+  - Linus のメインラインツリー
+  - メジャー番号をまたぐ数本の安定版ツリー
+  - サブシステム毎のカーネルツリー
+  - 統合テストのための linux-next カーネルツリー
 
-4.x カーネルツリー
+メインラインツリー
 ~~~~~~~~~~~~~~~~~~
 
-4.x カーネルは Linus Torvalds によってメンテナンスされ、
-https://kernel.org の pub/linux/kernel/v4.x/ ディレクトリに存在します。
+メインラインツリーは Linus Torvalds によってメンテナンスされ、
+https://kernel.org のリポジトリに存在します。
 この開発プロセスは以下のとおり -
 
   - 新しいカーネルがリリースされた直後に、2週間の特別期間が設けられ、
     この期間中に、メンテナ達は Linus に大きな差分を送ることができます。
-    このような差分は通常 -next カーネルに数週間含まれてきたパッチです。
+    このような差分は通常 linux-next カーネルに数週間含まれてきたパッチです。
     大きな変更は git(カーネルのソース管理ツール、詳細は
     http://git-scm.com/ 参照) を使って送るのが好ましいやり方ですが、パッ
     チファイルの形式のまま送るのでも十分です。
@@ -303,20 +303,18 @@ Andrew Morton が Linux-kernel メーリングリストにカーネルリリー�
         前もって決められた計画によってリリースされるものではないから
         です。」*
 
-4.x.y -stable カーネルツリー
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+メジャー番号をまたぐ数本の安定版ツリー
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 バージョン番号が3つの数字に分かれているカーネルは -stable カーネルです。
-これには、4.x カーネルで見つかったセキュリティ問題や重大な後戻りに対す
-る比較的小さい重要な修正が含まれます。
+これには最初の2つのバージョン番号の数字に対応した、
+メインラインリリースで見つかったセキュリティ問題や
+重大な後戻りに対する比較的小さい重要な修正が含まれます。
 
 これは、開発/実験的バージョンのテストに協力することに興味が無く、最新
 の安定したカーネルを使いたいユーザに推奨するブランチです。
 
-もし、4.x.y カーネルが存在しない場合には、番号が一番大きい 4.x が最新
-の安定版カーネルです。
-
-4.x.y は "stable" チーム <stable@vger.kernel.org> でメンテされており、
+安定版ツリーは"stable" チーム <stable@vger.kernel.org> でメンテされており、
 必要に応じてリリースされます。通常のリリース期間は 2週間毎ですが、差
 し迫った問題がなければもう少し長くなることもあります。セキュリティ関
 連の問題の場合はこれに対してだいたいの場合、すぐにリリースがされます。
@@ -326,7 +324,7 @@ Documentation/process/stable-kernel-rules.rst ファイルにはどのような�
 類の変更が -stable ツリーに受け入れ可能か、またリリースプロセスがどう
 動くかが記述されています。
 
-サブシステム毎のカーネルツリーとパッチ
+サブシステム毎のカーネルツリー
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 それぞれのカーネルサブシステムのメンテナ達は --- そして多くのカーネル
@@ -351,19 +349,19 @@ quilt シリーズとして公開されているパッチキューも使われ�
 けることができます。大部分のこれらの patchwork のサイトは
 https://patchwork.kernel.org/ でリストされています。
 
-統合テストのための 4.x -next カーネルツリー
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+統合テストのための linux-next カーネルツリー
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-サブシステムツリーの更新内容がメインラインの 4.x ツリーにマージされる
+サブシステムツリーの更新内容がメインラインツリーにマージされる
 前に、それらは統合テストされる必要があります。この目的のため、実質的に
 全サブシステムツリーからほぼ毎日プルされてできる特別なテスト用のリポジ
 トリが存在します-
 
        https://git.kernel.org/?p=linux/kernel/git/next/linux-next.git
 
-このやり方によって、-next カーネルは次のマージ機会でどんなものがメイン
-ラインカーネルにマージされるか、おおまかなの展望を提供します。-next カー
-ネルの実行テストを行う冒険好きなテスターは大いに歓迎されます。
+このやり方によって、linux-next は次のマージ機会でどんなものがメイン
+ラインにマージされるか、おおまかな展望を提供します。
+linux-next の実行テストを行う冒険好きなテスターは大いに歓迎されます。
 
 バグレポート
 -------------
diff --git a/Documentation/translations/ja_JP/index.rst b/Documentation/translations/ja_JP/index.rst
index 20738c931d02..43b9fb7246d3 100644
--- a/Documentation/translations/ja_JP/index.rst
+++ b/Documentation/translations/ja_JP/index.rst
@@ -5,7 +5,7 @@
 	\kerneldocCJKon
 	\kerneldocBeginJP{
 
-Japanese translations
+日本語訳
 =====================
 
 .. toctree::
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst
index 9e541578f38d..1500bdbf338a 100644
--- a/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst
@@ -53,8 +53,8 @@ DAMON_RECLAIM找到在特定时间内没有被访问的内存区域并分页。�
 
 下面是每个参数的描述。
 
-enable
-------
+enabled
+-------
 
 启用或禁用DAMON_RECLAIM。
 
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst
index 5d7533347216..eee0e8c5c368 100644
--- a/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst
@@ -13,7 +13,7 @@
 详细用法
 ========
 
-DAMON 为不同的用户提供了下面三种接口。
+DAMON 为不同的用户提供了下面这些接口。
 
 - *DAMON用户空间工具。*
   `这 <https://github.com/awslabs/damo>`_ 为有这特权的人， 如系统管理员，希望有一个刚好
@@ -21,19 +21,290 @@ DAMON 为不同的用户提供了下面三种接口。
   使用它，用户可以以人性化的方式使用DAMON的主要功能。不过，它可能不会为特殊情况进行高度调整。
   它同时支持虚拟和物理地址空间的监测。更多细节，请参考它的 `使用文档
   <https://github.com/awslabs/damo/blob/next/USAGE.md>`_。
-- *debugfs接口。*
-  :ref:`这 <debugfs_interface>` 是为那些希望更高级的使用DAMON的特权用户空间程序员准备的。
-  使用它，用户可以通过读取和写入特殊的debugfs文件来使用DAMON的主要功能。因此，你可以编写和使
-  用你个性化的DAMON debugfs包装程序，代替你读/写debugfs文件。  `DAMON用户空间工具
+- *sysfs接口。*
+  :ref:`这 <sysfs_interface>` 是为那些希望更高级的使用DAMON的特权用户空间程序员准备的。
+  使用它，用户可以通过读取和写入特殊的sysfs文件来使用DAMON的主要功能。因此，你可以编写和使
+  用你个性化的DAMON sysfs包装程序，代替你读/写sysfs文件。  `DAMON用户空间工具
   <https://github.com/awslabs/damo>`_ 就是这种程序的一个例子  它同时支持虚拟和物理地址
   空间的监测。注意，这个界面只提供简单的监测结果 :ref:`统计 <damos_stats>`。对于详细的监测
   结果，DAMON提供了一个:ref:`跟踪点 <tracepoint>`。
-
+- *debugfs interface.*
+  :ref:`这 <debugfs_interface>` 几乎与:ref:`sysfs interface <sysfs_interface>` 接
+  口相同。这将在下一个LTS内核发布后被移除，所以用户应该转移到
+  :ref:`sysfs interface <sysfs_interface>`。
 - *内核空间编程接口。*
-  :doc:`This </vm/damon/api>` 这是为内核空间程序员准备的。使用它，用户可以通过为你编写内
+  :doc:`这 </vm/damon/api>` 这是为内核空间程序员准备的。使用它，用户可以通过为你编写内
   核空间的DAMON应用程序，最灵活有效地利用DAMON的每一个功能。你甚至可以为各种地址空间扩展DAMON。
   详细情况请参考接口 :doc:`文件 </vm/damon/api>`。
 
+sysfs接口
+=========
+DAMON的sysfs接口是在定义 ``CONFIG_DAMON_SYSFS`` 时建立的。它在其sysfs目录下创建多
+个目录和文件， ``<sysfs>/kernel/mm/damon/`` 。你可以通过对该目录下的文件进行写入和
+读取来控制DAMON。
+
+对于一个简短的例子，用户可以监测一个给定工作负载的虚拟地址空间，如下所示::
+
+    # cd /sys/kernel/mm/damon/admin/
+    # echo 1 > kdamonds/nr && echo 1 > kdamonds/0/contexts/nr
+    # echo vaddr > kdamonds/0/contexts/0/operations
+    # echo 1 > kdamonds/0/contexts/0/targets/nr
+    # echo $(pidof <workload>) > kdamonds/0/contexts/0/targets/0/pid
+    # echo on > kdamonds/0/state
+
+文件层次结构
+------------
+
+DAMON sysfs接口的文件层次结构如下图所示。在下图中，父子关系用缩进表示，每个目录有
+``/`` 后缀，每个目录中的文件用逗号（","）分开。 ::
+
+    /sys/kernel/mm/damon/admin
+    │ kdamonds/nr_kdamonds
+    │ │ 0/state,pid
+    │ │ │ contexts/nr_contexts
+    │ │ │ │ 0/operations
+    │ │ │ │ │ monitoring_attrs/
+    │ │ │ │ │ │ intervals/sample_us,aggr_us,update_us
+    │ │ │ │ │ │ nr_regions/min,max
+    │ │ │ │ │ targets/nr_targets
+    │ │ │ │ │ │ 0/pid_target
+    │ │ │ │ │ │ │ regions/nr_regions
+    │ │ │ │ │ │ │ │ 0/start,end
+    │ │ │ │ │ │ │ │ ...
+    │ │ │ │ │ │ ...
+    │ │ │ │ │ schemes/nr_schemes
+    │ │ │ │ │ │ 0/action
+    │ │ │ │ │ │ │ access_pattern/
+    │ │ │ │ │ │ │ │ sz/min,max
+    │ │ │ │ │ │ │ │ nr_accesses/min,max
+    │ │ │ │ │ │ │ │ age/min,max
+    │ │ │ │ │ │ │ quotas/ms,bytes,reset_interval_ms
+    │ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
+    │ │ │ │ │ │ │ watermarks/metric,interval_us,high,mid,low
+    │ │ │ │ │ │ │ stats/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds
+    │ │ │ │ │ │ ...
+    │ │ │ │ ...
+    │ │ ...
+
+根
+--
+
+DAMON sysfs接口的根是 ``<sysfs>/kernel/mm/damon/`` ，它有一个名为 ``admin`` 的
+目录。该目录包含特权用户空间程序控制DAMON的文件。拥有根权限的用户空间工具或deamons可以
+使用这个目录。
+
+kdamonds/
+---------
+
+与监测相关的信息包括请求规格和结果被称为DAMON上下文。DAMON用一个叫做kdamond的内核线程
+执行每个上下文，多个kdamonds可以并行运行。
+
+在 ``admin`` 目录下，有一个目录，即``kdamonds``，它有控制kdamonds的文件存在。在开始
+时，这个目录只有一个文件，``nr_kdamonds``。向该文件写入一个数字（``N``），就会创建名为
+``0`` 到 ``N-1`` 的子目录数量。每个目录代表每个kdamond。
+
+kdamonds/<N>/
+-------------
+
+在每个kdamond目录中，存在两个文件（``state`` 和 ``pid`` ）和一个目录( ``contexts`` )。
+
+读取 ``state`` 时，如果kdamond当前正在运行，则返回 ``on`` ，如果没有运行则返回 ``off`` 。
+写入 ``on`` 或 ``off`` 使kdamond处于状态。向 ``state`` 文件写 ``update_schemes_stats`` ，
+更新kdamond的每个基于DAMON的操作方案的统计文件的内容。关于统计信息的细节，请参考
+:ref:`stats section <sysfs_schemes_stats>`.
+
+如果状态为 ``on``，读取 ``pid`` 显示kdamond线程的pid。
+
+``contexts`` 目录包含控制这个kdamond要执行的监测上下文的文件。
+
+kdamonds/<N>/contexts/
+----------------------
+
+在开始时，这个目录只有一个文件，即 ``nr_contexts`` 。向该文件写入一个数字( ``N`` )，就会创
+建名为``0`` 到 ``N-1`` 的子目录数量。每个目录代表每个监测背景。目前，每个kdamond只支持
+一个上下文，所以只有 ``0`` 或 ``1`` 可以被写入文件。
+
+contexts/<N>/
+-------------
+
+在每个上下文目录中，存在一个文件(``operations``)和三个目录(``monitoring_attrs``,
+``targets``, 和 ``schemes``)。
+
+DAMON支持多种类型的监测操作，包括对虚拟地址空间和物理地址空间的监测。你可以通过向文件
+中写入以下关键词之一，并从文件中读取，来设置和获取DAMON将为上下文使用何种类型的监测操作。
+
+ - vaddr: 监测特定进程的虚拟地址空间
+ - paddr: 监视系统的物理地址空间
+
+contexts/<N>/monitoring_attrs/
+------------------------------
+
+用于指定监测属性的文件，包括所需的监测质量和效率，都在 ``monitoring_attrs`` 目录中。
+具体来说，这个目录下有两个目录，即 ``intervals`` 和 ``nr_regions`` 。
+
+在 ``intervals`` 目录下，存在DAMON的采样间隔(``sample_us``)、聚集间隔(``aggr_us``)
+和更新间隔(``update_us``)三个文件。你可以通过写入和读出这些文件来设置和获取微秒级的值。
+
+在 ``nr_regions`` 目录下，有两个文件分别用于DAMON监测区域的下限和上限（``min`` 和 ``max`` ），
+这两个文件控制着监测的开销。你可以通过向这些文件的写入和读出来设置和获取这些值。
+
+关于间隔和监测区域范围的更多细节，请参考设计文件 (:doc:`/vm/damon/design`)。
+
+contexts/<N>/targets/
+---------------------
+
+在开始时，这个目录只有一个文件 ``nr_targets`` 。向该文件写入一个数字(``N``)，就可以创建
+名为 ``0`` 到 ``N-1`` 的子目录的数量。每个目录代表每个监测目标。
+
+targets/<N>/
+------------
+
+在每个目标目录中，存在一个文件(``pid_target``)和一个目录(``regions``)。
+
+如果你把 ``vaddr`` 写到 ``contexts/<N>/operations`` 中，每个目标应该是一个进程。你
+可以通过将进程的pid写到 ``pid_target`` 文件中来指定DAMON的进程。
+
+targets/<N>/regions
+-------------------
+
+当使用 ``vaddr`` 监测操作集时（ ``vaddr`` 被写入 ``contexts/<N>/operations`` 文
+件），DAMON自动设置和更新监测目标区域，这样就可以覆盖目标进程的整个内存映射。然而，用户可
+能希望将初始监测区域设置为特定的地址范围。
+
+相反，当使用 ``paddr`` 监测操作集时，DAMON不会自动设置和更新监测目标区域（ ``paddr``
+被写入 ``contexts/<N>/operations`` 中）。因此，在这种情况下，用户应该自己设置监测目标
+区域。
+
+在这种情况下，用户可以按照自己的意愿明确设置初始监测目标区域，将适当的值写入该目录下的文件。
+
+开始时，这个目录只有一个文件， ``nr_regions`` 。向该文件写入一个数字(``N``)，就可以创
+建名为 ``0`` 到  ``N-1`` 的子目录。每个目录代表每个初始监测目标区域。
+
+regions/<N>/
+------------
+
+在每个区域目录中，你会发现两个文件（ ``start``  和  ``end`` ）。你可以通过向文件写入
+和从文件中读出，分别设置和获得初始监测目标区域的起始和结束地址。
+
+contexts/<N>/schemes/
+---------------------
+
+对于一版的基于DAMON的数据访问感知的内存管理优化，用户通常希望系统对特定访问模式的内存区
+域应用内存管理操作。DAMON从用户那里接收这种形式化的操作方案，并将这些方案应用于目标内存
+区域。用户可以通过读取和写入这个目录下的文件来获得和设置这些方案。
+
+在开始时，这个目录只有一个文件，``nr_schemes``。向该文件写入一个数字(``N``)，就可以
+创建名为``0``到``N-1``的子目录的数量。每个目录代表每个基于DAMON的操作方案。
+
+schemes/<N>/
+------------
+
+在每个方案目录中，存在四个目录(``access_pattern``, ``quotas``,``watermarks``,
+和 ``stats``)和一个文件(``action``)。
+
+``action`` 文件用于设置和获取你想应用于具有特定访问模式的内存区域的动作。可以写入文件
+和从文件中读取的关键词及其含义如下。
+
+ - ``willneed``: 对有 ``MADV_WILLNEED`` 的区域调用 ``madvise()`` 。
+ - ``cold``: 对具有 ``MADV_COLD`` 的区域调用 ``madvise()`` 。
+ - ``pageout``: 为具有 ``MADV_PAGEOUT`` 的区域调用 ``madvise()`` 。
+ - ``hugepage``: 为带有 ``MADV_HUGEPAGE`` 的区域调用 ``madvise()`` 。
+ - ``nohugepage``: 为带有 ``MADV_NOHUGEPAGE`` 的区域调用 ``madvise()``。
+ - ``stat``: 什么都不做，只计算统计数据
+
+schemes/<N>/access_pattern/
+---------------------------
+
+每个基于DAMON的操作方案的目标访问模式由三个范围构成，包括以字节为单位的区域大小、每个
+聚合区间的监测访问次数和区域年龄的聚合区间数。
+
+在 ``access_pattern`` 目录下，存在三个目录（ ``sz``, ``nr_accesses``, 和 ``age`` ），
+每个目录有两个文件（``min`` 和 ``max`` ）。你可以通过向  ``sz``, ``nr_accesses``, 和
+``age``  目录下的 ``min`` 和 ``max`` 文件分别写入和读取来设置和获取给定方案的访问模式。
+
+schemes/<N>/quotas/
+-------------------
+
+每个 ``动作`` 的最佳 ``目标访问模式`` 取决于工作负载，所以不容易找到。更糟糕的是，将某些动作
+的方案设置得过于激进会造成严重的开销。为了避免这种开销，用户可以为每个方案限制时间和大小配额。
+具体来说，用户可以要求DAMON尽量只使用特定的时间（``时间配额``）来应用行动，并且在给定的时间间
+隔（``重置间隔``）内，只对具有目标访问模式的内存区域应用行动，而不使用特定数量（``大小配额``）。
+
+当预计超过配额限制时，DAMON会根据 ``目标访问模式`` 的大小、访问频率和年龄，对找到的内存区域
+进行优先排序。为了进行个性化的优先排序，用户可以为这三个属性设置权重。
+
+在 ``quotas`` 目录下，存在三个文件（``ms``, ``bytes``, ``reset_interval_ms``）和一个
+目录(``weights``)，其中有三个文件(``sz_permil``, ``nr_accesses_permil``, 和
+``age_permil``)。
+
+你可以设置以毫秒为单位的 ``时间配额`` ，以字节为单位的 ``大小配额`` ，以及以毫秒为单位的 ``重
+置间隔`` ，分别向这三个文件写入数值。你还可以通过向 ``weights`` 目录下的三个文件写入数值来设
+置大小、访问频率和年龄的优先权，单位为千分之一。
+
+schemes/<N>/watermarks/
+-----------------------
+
+为了便于根据系统状态激活和停用每个方案，DAMON提供了一个称为水位的功能。该功能接收五个值，称为
+``度量`` 、``间隔`` 、``高`` 、``中`` 、``低`` 。``度量值`` 是指可以测量的系统度量值，如
+自由内存比率。如果系统的度量值 ``高`` 于memoent的高值或 ``低`` 于低值，则该方案被停用。如果
+该值低于 ``中`` ，则该方案被激活。
+
+在水位目录下，存在五个文件(``metric``, ``interval_us``,``high``, ``mid``, and ``low``)
+用于设置每个值。你可以通过向这些文件的写入来分别设置和获取这五个值。
+
+可以写入 ``metric`` 文件的关键词和含义如下。
+
+ - none: 忽略水位
+ - free_mem_rate: 系统的自由内存率（千分比）。
+
+``interval`` 应以微秒为单位写入。
+
+schemes/<N>/stats/
+------------------
+
+DAMON统计每个方案被尝试应用的区域的总数量和字节数，每个方案被成功应用的区域的两个数字，以及
+超过配额限制的总数量。这些统计数据可用于在线分析或调整方案。
+
+可以通过读取 ``stats`` 目录下的文件(``nr_tried``, ``sz_tried``, ``nr_applied``,
+``sz_applied``, 和 ``qt_exceeds``)）分别检索这些统计数据。这些文件不是实时更新的，所以
+你应该要求DAMON sysfs接口通过在相关的 ``kdamonds/<N>/state`` 文件中写入一个特殊的关键字
+``update_schemes_stats`` 来更新统计信息的文件内容。
+
+用例
+~~~~
+
+下面的命令应用了一个方案：”如果一个大小为[4KiB, 8KiB]的内存区域在[10, 20]的聚合时间间隔内
+显示出每一个聚合时间间隔[0, 5]的访问量，请分页该区域。对于分页，每秒最多只能使用10ms，而且每
+秒分页不能超过1GiB。在这一限制下，首先分页出具有较长年龄的内存区域。另外，每5秒钟检查一次系统
+的可用内存率，当可用内存率低于50%时开始监测和分页，但如果可用内存率大于60%，或低于30%，则停
+止监测。“ ::
+
+    # cd <sysfs>/kernel/mm/damon/admin
+    # # populate directories
+    # echo 1 > kdamonds/nr_kdamonds; echo 1 > kdamonds/0/contexts/nr_contexts;
+    # echo 1 > kdamonds/0/contexts/0/schemes/nr_schemes
+    # cd kdamonds/0/contexts/0/schemes/0
+    # # set the basic access pattern and the action
+    # echo 4096 > access_patterns/sz/min
+    # echo 8192 > access_patterns/sz/max
+    # echo 0 > access_patterns/nr_accesses/min
+    # echo 5 > access_patterns/nr_accesses/max
+    # echo 10 > access_patterns/age/min
+    # echo 20 > access_patterns/age/max
+    # echo pageout > action
+    # # set quotas
+    # echo 10 > quotas/ms
+    # echo $((1024*1024*1024)) > quotas/bytes
+    # echo 1000 > quotas/reset_interval_ms
+    # # set watermark
+    # echo free_mem_rate > watermarks/metric
+    # echo 5000000 > watermarks/interval_us
+    # echo 600 > watermarks/high
+    # echo 500 > watermarks/mid
+    # echo 300 > watermarks/low
+
+请注意，我们强烈建议使用用户空间的工具，如 `damo <https://github.com/awslabs/damo>`_ ，
+而不是像上面那样手动读写文件。以上只是一个例子。
 
 debugfs接口
 ===========
@@ -46,7 +317,7 @@ DAMON导出了八个文件, ``attrs``, ``target_ids``, ``init_regions``,
 属性
 ----
 
-用户可以通过读取和写入 ``attrs`` 文件获得和设置 ``采样间隔`` 、 ``聚集间隔`` 、 ``区域更新间隔``
+用户可以通过读取和写入 ``attrs`` 文件获得和设置 ``采样间隔`` 、 ``聚集间隔`` 、 ``更新间隔``
 以及监测目标区域的最小/最大数量。要详细了解监测属性，请参考 `:doc:/vm/damon/design` 。例如，
 下面的命令将这些值设置为5ms、100ms、1000ms、10和1000，然后再次检查::
 
@@ -108,8 +379,8 @@ DAMON导出了八个文件, ``attrs``, ``target_ids``, ``init_regions``,
             1   20      40
             1   50      100" > init_regions
 
-请注意，这只是设置了初始的监测目标区域。在虚拟内存监测的情况下，DAMON会在一个 ``区域更新间隔``
-后自动更新区域的边界。因此，在这种情况下，如果用户不希望更新的话，应该把 ``区域的更新间隔`` 设
+请注意，这只是设置了初始的监测目标区域。在虚拟内存监测的情况下，DAMON会在一个 ``更新间隔``
+后自动更新区域的边界。因此，在这种情况下，如果用户不希望更新的话，应该把 ``更新间隔`` 设
 置得足够大。
 
 
diff --git a/Documentation/translations/zh_CN/dev-tools/gdb-kernel-debugging.rst b/Documentation/translations/zh_CN/dev-tools/gdb-kernel-debugging.rst
new file mode 100644
index 000000000000..17b5ce85a90c
--- /dev/null
+++ b/Documentation/translations/zh_CN/dev-tools/gdb-kernel-debugging.rst
@@ -0,0 +1,167 @@
+.. highlight:: none
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/dev-tools/gdb-kernel-debugging.rst
+:Translator: 高超 gao chao <gaochao49@huawei.com>
+
+通过gdb调试内核和模块
+=====================
+
+Kgdb内核调试器、QEMU等虚拟机管理程序或基于JTAG的硬件接口，支持在运行时使用gdb
+调试Linux内核及其模块。Gdb提供了一个强大的python脚本接口，内核也提供了一套
+辅助脚本以简化典型的内核调试步骤。本文档为如何启用和使用这些脚本提供了一个简要的教程。
+此教程基于QEMU/KVM虚拟机，但文中示例也适用于其他gdb stub。
+
+
+环境配置要求
+------------
+
+- gdb 7.2+ (推荐版本: 7.4+) 且开启python支持 (通常发行版上都已支持)
+
+设置
+----
+
+- 创建一个QEMU/KVM的linux虚拟机（详情请参考 www.linux-kvm.org 和 www.qemu.org ）。
+  对于交叉开发，https://landley.net/aboriginal/bin 提供了一些镜像和工具链，
+  可以帮助搭建交叉开发环境。
+
+- 编译内核时开启CONFIG_GDB_SCRIPTS，关闭CONFIG_DEBUG_INFO_REDUCED。
+  如果架构支持CONFIG_FRAME_POINTER，请保持开启。
+
+- 在guest环境上安装该内核。如有必要，通过在内核command line中添加“nokaslr”来关闭KASLR。
+  此外，QEMU允许通过-kernel、-append、-initrd这些命令行选项直接启动内核。
+  但这通常仅在不依赖内核模块时才有效。有关此模式的更多详细信息，请参阅QEMU文档。
+  在这种情况下，如果架构支持KASLR，应该在禁用CONFIG_RANDOMIZE_BASE的情况下构建内核。
+
+- 启用QEMU/KVM的gdb stub，可以通过如下方式实现
+
+    - 在VM启动时，通过在QEMU命令行中添加“-s”参数
+
+  或
+
+    - 在运行时通过从QEMU监视控制台发送“gdbserver”
+
+- 切换到/path/to/linux-build(内核源码编译)目录
+
+- 启动gdb：gdb vmlinux
+
+  注意：某些发行版可能会将gdb脚本的自动加载限制在已知的安全目录中。
+  如果gdb报告拒绝加载vmlinux-gdb.py（相关命令找不到），请将::
+
+    add-auto-load-safe-path /path/to/linux-build
+
+  添加到~/.gdbinit。更多详细信息，请参阅gdb帮助信息。
+
+- 连接到已启动的guest环境::
+
+    (gdb) target remote :1234
+
+
+使用Linux提供的gdb脚本的示例
+----------------------------
+
+- 加载模块（以及主内核）符号::
+
+    (gdb) lx-symbols
+    loading vmlinux
+    scanning for modules in /home/user/linux/build
+    loading @0xffffffffa0020000: /home/user/linux/build/net/netfilter/xt_tcpudp.ko
+    loading @0xffffffffa0016000: /home/user/linux/build/net/netfilter/xt_pkttype.ko
+    loading @0xffffffffa0002000: /home/user/linux/build/net/netfilter/xt_limit.ko
+    loading @0xffffffffa00ca000: /home/user/linux/build/net/packet/af_packet.ko
+    loading @0xffffffffa003c000: /home/user/linux/build/fs/fuse/fuse.ko
+    ...
+    loading @0xffffffffa0000000: /home/user/linux/build/drivers/ata/ata_generic.ko
+
+- 对一些尚未加载的模块中的函数函数设置断点，例如::
+
+    (gdb) b btrfs_init_sysfs
+    Function "btrfs_init_sysfs" not defined.
+    Make breakpoint pending on future shared library load? (y or [n]) y
+    Breakpoint 1 (btrfs_init_sysfs) pending.
+
+- 继续执行::
+
+    (gdb) c
+
+- 加载模块并且能观察到正在加载的符号以及断点命中::
+
+    loading @0xffffffffa0034000: /home/user/linux/build/lib/libcrc32c.ko
+    loading @0xffffffffa0050000: /home/user/linux/build/lib/lzo/lzo_compress.ko
+    loading @0xffffffffa006e000: /home/user/linux/build/lib/zlib_deflate/zlib_deflate.ko
+    loading @0xffffffffa01b1000: /home/user/linux/build/fs/btrfs/btrfs.ko
+
+    Breakpoint 1, btrfs_init_sysfs () at /home/user/linux/fs/btrfs/sysfs.c:36
+    36              btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj);
+
+- 查看内核的日志缓冲区::
+
+    (gdb) lx-dmesg
+    [     0.000000] Initializing cgroup subsys cpuset
+    [     0.000000] Initializing cgroup subsys cpu
+    [     0.000000] Linux version 3.8.0-rc4-dbg+ (...
+    [     0.000000] Command line: root=/dev/sda2 resume=/dev/sda1 vga=0x314
+    [     0.000000] e820: BIOS-provided physical RAM map:
+    [     0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009fbff] usable
+    [     0.000000] BIOS-e820: [mem 0x000000000009fc00-0x000000000009ffff] reserved
+    ....
+
+- 查看当前task struct结构体的字段（仅x86和arm64支持）::
+
+    (gdb) p $lx_current().pid
+    $1 = 4998
+    (gdb) p $lx_current().comm
+    $2 = "modprobe\000\000\000\000\000\000\000"
+
+- 对当前或指定的CPU使用per-cpu函数::
+
+    (gdb) p $lx_per_cpu("runqueues").nr_running
+    $3 = 1
+    (gdb) p $lx_per_cpu("runqueues", 2).nr_running
+    $4 = 0
+
+- 使用container_of查看更多hrtimers信息::
+
+    (gdb) set $next = $lx_per_cpu("hrtimer_bases").clock_base[0].active.next
+    (gdb) p *$container_of($next, "struct hrtimer", "node")
+    $5 = {
+      node = {
+        node = {
+          __rb_parent_color = 18446612133355256072,
+          rb_right = 0x0 <irq_stack_union>,
+          rb_left = 0x0 <irq_stack_union>
+        },
+        expires = {
+          tv64 = 1835268000000
+        }
+      },
+      _softexpires = {
+        tv64 = 1835268000000
+      },
+      function = 0xffffffff81078232 <tick_sched_timer>,
+      base = 0xffff88003fd0d6f0,
+      state = 1,
+      start_pid = 0,
+      start_site = 0xffffffff81055c1f <hrtimer_start_range_ns+20>,
+      start_comm = "swapper/2\000\000\000\000\000\000"
+    }
+
+
+命令和辅助调试功能列表
+----------------------
+
+命令和辅助调试功能可能会随着时间的推移而改进，此文显示的是初始版本的部分示例::
+
+ (gdb) apropos lx
+ function lx_current -- Return current task
+ function lx_module -- Find module by name and return the module variable
+ function lx_per_cpu -- Return per-cpu variable
+ function lx_task_by_pid -- Find Linux task by PID and return the task_struct variable
+ function lx_thread_info -- Calculate Linux thread_info from task variable
+ lx-dmesg -- Print Linux kernel log buffer
+ lx-lsmod -- List currently loaded modules
+ lx-symbols -- (Re-)load symbols of Linux kernel and currently loaded modules
+
+可以通过“help <command-name>”或“help function <function-name>”命令
+获取指定命令或指定调试功能的更多详细信息。
diff --git a/Documentation/translations/zh_CN/dev-tools/index.rst b/Documentation/translations/zh_CN/dev-tools/index.rst
index 77a8c44cdf49..02577c379007 100644
--- a/Documentation/translations/zh_CN/dev-tools/index.rst
+++ b/Documentation/translations/zh_CN/dev-tools/index.rst
@@ -25,6 +25,7 @@ Documentation/translations/zh_CN/dev-tools/testing-overview.rst
    sparse
    gcov
    kasan
+   gdb-kernel-debugging
 
 Todolist:
 
@@ -34,7 +35,6 @@ Todolist:
  - kmemleak
  - kcsan
  - kfence
- - gdb-kernel-debugging
  - kgdb
  - kselftest
  - kunit/index
diff --git a/Documentation/translations/zh_CN/devicetree/usage-model.rst b/Documentation/translations/zh_CN/devicetree/usage-model.rst
index 318a3c6a0114..accdc33475a0 100644
--- a/Documentation/translations/zh_CN/devicetree/usage-model.rst
+++ b/Documentation/translations/zh_CN/devicetree/usage-model.rst
@@ -120,24 +120,24 @@ dt_compat列表（如果你好奇，该列表定义在arch/arm/include/asm/mach/
 表示什么。在Documentation/devicetree/bindings中添加兼容字符串的文档。
 
 同样在ARM上，对于每个machine_desc，内核会查看是否有任何dt_compat列表条
-目出现在兼容属性中。如果有，那么该机器_desc就是驱动该机器的候选者。在搜索
+目出现在兼容属性中。如果有，那么该machine_desc就是驱动该机器的候选者。在搜索
 了整个machine_descs表之后，setup_machine_fdt()根据每个machine_desc
 在兼容属性中匹配的条目，返回 “最兼容” 的machine_desc。如果没有找到匹配
 的machine_desc，那么它将返回NULL。
 
 这个方案背后的原因是观察到，在大多数情况下，如果它们都使用相同的SoC或相同
-系列的SoC，一个机器_desc可以支持大量的电路板。然而，不可避免地会有一些例
+系列的SoC，一个machine_desc可以支持大量的电路板。然而，不可避免地会有一些例
 外情况，即特定的板子需要特殊的设置代码，这在一般情况下是没有用的。特殊情况
 可以通过在通用设置代码中明确检查有问题的板子来处理，但如果超过几个情况下，
 这样做很快就会变得很难看和/或无法维护。
 
-相反，兼容列表允许通用机器_desc通过在dt_compat列表中指定“不太兼容”的值
+相反，兼容列表允许通用machine_desc通过在dt_compat列表中指定“不太兼容”的值
 来提供对广泛的通用板的支持。在上面的例子中，通用板支持可以声称与“ti,ompa3”
 或“ti,ompa3450”兼容。如果在最初的beagleboard上发现了一个bug，需要在
 早期启动时使用特殊的变通代码，那么可以添加一个新的machine_desc，实现变通，
 并且只在“ti,omap3-beagleboard”上匹配。
 
-PowerPC使用了一个稍微不同的方案，它从每个机器_desc中调用.probe()钩子，
+PowerPC使用了一个稍微不同的方案，它从每个machine_desc中调用.probe()钩子，
 并使用第一个返回TRUE的钩子。然而，这种方法没有考虑到兼容列表的优先级，对于
 新的架构支持可能应该避免。
 
diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index 88d8df957a78..ac32d8e306ac 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -108,6 +108,7 @@ TODOList:
    :maxdepth: 2
 
    core-api/index
+   locking/index
    accounting/index
    cpu-freq/index
    iio/index
@@ -123,7 +124,6 @@ TODOList:
 TODOList:
 
 * driver-api/index
-* locking/index
 * block/index
 * cdrom/index
 * ide/index
diff --git a/Documentation/translations/zh_CN/locking/index.rst b/Documentation/translations/zh_CN/locking/index.rst
new file mode 100644
index 000000000000..700df8a2bb70
--- /dev/null
+++ b/Documentation/translations/zh_CN/locking/index.rst
@@ -0,0 +1,42 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/locking/index.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+==
+锁
+==
+
+.. toctree::
+    :maxdepth: 1
+
+TODOList:
+
+    * locktypes
+    * lockdep-design
+    * lockstat
+    * locktorture
+    * mutex-design
+    * rt-mutex-design
+    * rt-mutex
+    * seqlock
+    * spinlocks
+    * ww-mutex-design
+    * preempt-locking
+    * pi-futex
+    * futex-requeue-pi
+    * hwspinlock
+    * percpu-rw-semaphore
+    * robust-futexes
+    * robust-futex-ABI
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/translations/zh_CN/locking/spinlocks.rst b/Documentation/translations/zh_CN/locking/spinlocks.rst
new file mode 100644
index 000000000000..2017c01f0a4b
--- /dev/null
+++ b/Documentation/translations/zh_CN/locking/spinlocks.rst
@@ -0,0 +1,149 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/locking/spinlocks.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+==========
+加锁的教训
+==========
+
+教训 1：自旋锁
+==============
+
+加锁最基本的原语是自旋锁（spinlock）::
+
+  static DEFINE_SPINLOCK(xxx_lock);
+
+	unsigned long flags;
+
+	spin_lock_irqsave(&xxx_lock, flags);
+	... 这里是临界区 ..
+	spin_unlock_irqrestore(&xxx_lock, flags);
+
+上述代码总是安全的。自旋锁将在 _本地_ 禁用中断，但它本身将保证全局锁定。所以它
+将保证在该锁保护的区域内只有一个控制线程。即使在单处理器（UP）下也能很好的工作，
+所以代码 _不_ 需要担心UP还是SMP的问题：自旋锁在两种情况下都能正常工作。
+
+   注意！自旋锁对内存的潜在影响由下述文档进一步描述：
+
+     Documentation/memory-barriers.txt
+
+       (5) ACQUIRE operations.
+
+       (6) RELEASE operations.
+
+上述代码通常非常简单（对大部分情况，你通常需要并且只希望有一个自旋锁——使用多个
+自旋锁会使事情变得更复杂，甚至更慢，而且通常仅仅在你 **理解的** 序列有被拆分的
+需求时才值得这么做：如果你不确定的话，请不惜一切代价避免这样做）。
+
+这是关于自旋锁的唯一真正困难的部分：一旦你开始使用自旋锁，它们往往会扩展到你以前
+可能没有注意到的领域，因为你必须确保自旋锁正确地保护共享数据结构 **每一处** 被
+使用的地方。自旋锁是最容易被添加到完全独立于其它代码的地方（例如，没有人访问的
+内部驱动数据结构）的。
+
+   注意！仅当你在跨CPU核访问时使用 **同一把** 自旋锁，对它的使用才是安全的。
+   这意味着所有访问共享变量的代码必须对它们想使用的自旋锁达成一致。
+
+----
+
+教训 2：读-写自旋锁
+===================
+
+如果你的数据访问有一个非常自然的模式，倾向于从共享变量中读取数据，读-写自旋锁
+（rw_lock）有时是有用的。它们允许多个读者同时出现在同一个临界区，但是如果有人想
+改变变量，它必须获得一个独占的写锁。
+
+   注意！读-写自旋锁比原始自旋锁需要更多的原子内存操作。除非读者的临界区很长，
+   否则你最好只使用原始自旋锁。
+
+例程看起来和上面一样::
+
+   rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
+
+	unsigned long flags;
+
+	read_lock_irqsave(&xxx_lock, flags);
+	.. 仅读取信息的临界区 ...
+	read_unlock_irqrestore(&xxx_lock, flags);
+
+	write_lock_irqsave(&xxx_lock, flags);
+	.. 读取和独占写信息 ...
+	write_unlock_irqrestore(&xxx_lock, flags);
+
+上面这种锁对于复杂的数据结构如链表可能会有用，特别是在不改变链表的情况下搜索其中
+的条目。读锁允许许多并发的读者。任何希望 **修改** 链表的代码将必须先获取写锁。
+
+   注意！RCU锁更适合遍历链表，但需要仔细注意设计细节（见Documentation/RCU/listRCU.rst）。
+
+另外，你不能把读锁“升级”为写锁，所以如果你在 _任何_ 时候需要做任何修改
+（即使你不是每次都这样做），你必须在一开始就获得写锁。
+
+   注意！我们正在努力消除大多数情况下的读-写自旋锁的使用，所以请不要在没有达成
+   共识的情况下增加一个新的（相反，请参阅Documentation/RCU/rcu.rst以获得完整
+   信息）。
+
+----
+
+教训 3：重新审视自旋锁
+======================
+
+上述的自旋锁原语绝不是唯一的。它们是最安全的，在所有情况下都能正常工作，但部分
+**因为** 它们是安全的，它们也是相当慢的。它们比原本需要的更慢，因为它们必须要
+禁用中断（在X86上只是一条指令，但却是一条昂贵的指令——而在其他体系结构上，情况
+可能更糟）。
+
+如果你有必须保护跨CPU访问的数据结构且你想使用自旋锁的场景，你有可能使用代价小的
+自旋锁版本。当且仅当你知道某自旋锁永远不会在中断处理程序中使用，你可以使用非中断
+的版本::
+
+	spin_lock(&lock);
+	...
+	spin_unlock(&lock);
+
+（当然，也可以使用相应的读-写锁版本）。这种自旋锁将同样可以保证独占访问，而且
+速度会快很多。如果你知道有关的数据只在“进程上下文”中被存取，即，不涉及中断，
+这种自旋锁就有用了。
+
+当这些版本的自旋锁涉及中断时，你不能使用的原因是会陷入死锁::
+
+	spin_lock(&lock);
+	...
+		<- 中断来临：
+			spin_lock(&lock);
+
+一个中断试图对一个已经锁定的变量上锁。如果中断发生在另一个CPU上，不会有问题；
+但如果中断发生在已经持有自旋锁的同一个CPU上，将 _会_ 有问题，因为该锁显然永远
+不会被释放（因为中断正在等待该锁，而锁的持有者被中断打断，并且无法继续执行，
+直到中断处理结束）。
+
+（这也是自旋锁的中断版本只需要禁用 _本地_ 中断的原因——在发生于其它CPU的中断中
+使用同一把自旋锁是没问题的，因为发生于其它CPU的中断不会打断已经持锁的CPU，所以
+锁的持有者可以继续执行并最终释放锁）。
+
+		Linus
+
+----
+
+参考信息
+========
+
+对于动态初始化，使用spin_lock_init()或rwlock_init()是合适的::
+
+   spinlock_t xxx_lock;
+   rwlock_t xxx_rw_lock;
+
+   static int __init xxx_init(void)
+   {
+	spin_lock_init(&xxx_lock);
+	rwlock_init(&xxx_rw_lock);
+	...
+   }
+
+   module_init(xxx_init);
+
+对于静态初始化，使用DEFINE_SPINLOCK() / DEFINE_RWLOCK()或
+__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED()是合适的。
diff --git a/Documentation/translations/zh_CN/process/howto.rst b/Documentation/translations/zh_CN/process/howto.rst
index 2903d7161bc8..1334cdb32a3c 100644
--- a/Documentation/translations/zh_CN/process/howto.rst
+++ b/Documentation/translations/zh_CN/process/howto.rst
@@ -252,7 +252,7 @@ Linux-next 集成测试树
 在将子系统树的更新合并到主线树之前，需要对它们进行集成测试。为此，存在一个
 特殊的测试存储库，其中几乎每天都会提取所有子系统树：
 
-        https://git.kernel.org/？p=linux/kernel/git/next/linux-next.git
+        https://git.kernel.org/?p=linux/kernel/git/next/linux-next.git
 
 通过这种方式，Linux-next 对下一个合并阶段将进入主线内核的内容给出了一个概要
 展望。非常欢冒险的测试者运行测试Linux-next。
diff --git a/Documentation/translations/zh_CN/scheduler/index.rst b/Documentation/translations/zh_CN/scheduler/index.rst
index 12bf3bd02ccf..a8eaa7325f54 100644
--- a/Documentation/translations/zh_CN/scheduler/index.rst
+++ b/Documentation/translations/zh_CN/scheduler/index.rst
@@ -25,8 +25,10 @@ Linux调度器
     sched-domains
     sched-capacity
     sched-energy
+    schedutil
     sched-nice-design
     sched-stats
+    sched-debug
 
 TODOList:
 
diff --git a/Documentation/translations/zh_CN/scheduler/sched-debug.rst b/Documentation/translations/zh_CN/scheduler/sched-debug.rst
new file mode 100644
index 000000000000..5e17740c2bf3
--- /dev/null
+++ b/Documentation/translations/zh_CN/scheduler/sched-debug.rst
@@ -0,0 +1,51 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/scheduler/sched-debug.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+=============
+调度器debugfs
+=============
+
+用配置项CONFIG_SCHED_DEBUG=y启动内核后，将可以访问/sys/kernel/debug/sched
+下的调度器专用调试文件。其中一些文件描述如下。
+
+numa_balancing
+==============
+
+`numa_balancing` 目录用来存放控制非统一内存访问（NUMA）平衡特性的相关文件。
+如果该特性导致系统负载太高，那么可以通过 `scan_period_min_ms, scan_delay_ms,
+scan_period_max_ms, scan_size_mb` 文件控制NUMA缺页的内核采样速率。
+
+
+scan_period_min_ms, scan_delay_ms, scan_period_max_ms, scan_size_mb
+-------------------------------------------------------------------
+
+自动NUMA平衡会扫描任务地址空间，检测页面是否被正确放置，或者数据是否应该被
+迁移到任务正在运行的本地内存结点，此时需解映射页面。每个“扫描延迟”（scan delay）
+时间之后，任务扫描其地址空间中下一批“扫描大小”（scan size）个页面。若抵达
+内存地址空间末尾，扫描器将从头开始重新扫描。
+
+结合来看，“扫描延迟”和“扫描大小”决定扫描速率。当“扫描延迟”减小时，扫描速率
+增加。“扫描延迟”和每个任务的扫描速率都是自适应的，且依赖历史行为。如果页面被
+正确放置，那么扫描延迟就会增加；否则扫描延迟就会减少。“扫描大小”不是自适应的，
+“扫描大小”越大，扫描速率越高。
+
+更高的扫描速率会产生更高的系统开销，因为必须捕获缺页异常，并且潜在地必须迁移
+数据。然而，当扫描速率越高，若工作负载模式发生变化，任务的内存将越快地迁移到
+本地结点，由于远程内存访问而产生的性能影响将降到最低。下面这些文件控制扫描延迟
+的阈值和被扫描的页面数量。
+
+``scan_period_min_ms`` 是扫描一个任务虚拟内存的最小时间，单位是毫秒。它有效地
+控制了每个任务的最大扫描速率。
+
+``scan_delay_ms`` 是一个任务初始化创建（fork）时，第一次使用的“扫描延迟”。
+
+``scan_period_max_ms`` 是扫描一个任务虚拟内存的最大时间，单位是毫秒。它有效地
+控制了每个任务的最小扫描速率。
+
+``scan_size_mb`` 是一次特定的扫描中，要扫描多少兆字节（MB）对应的页面数。
diff --git a/Documentation/translations/zh_CN/scheduler/schedutil.rst b/Documentation/translations/zh_CN/scheduler/schedutil.rst
new file mode 100644
index 000000000000..d1ea68007520
--- /dev/null
+++ b/Documentation/translations/zh_CN/scheduler/schedutil.rst
@@ -0,0 +1,165 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/scheduler/schedutil.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+=========
+Schedutil
+=========
+
+.. note::
+
+   本文所有内容都假设频率和工作算力之间存在线性关系。我们知道这是有瑕疵的，
+   但这是最可行的近似处理。
+
+PELT（实体负载跟踪，Per Entity Load Tracking）
+==============================================
+
+通过PELT，我们跟踪了各种调度器实体的一些指标，从单个任务到任务组分片到CPU
+运行队列。我们使用指数加权移动平均数（Exponentially Weighted Moving Average，
+EWMA）作为其基础，每个周期（1024us）都会衰减，衰减速率满足y^32 = 0.5。
+也就是说，最近的32ms贡献负载的一半，而历史上的其它时间则贡献另一半。
+
+具体而言：
+
+  ewma_sum(u) := u_0 + u_1*y + u_2*y^2 + ...
+
+  ewma(u) = ewma_sum(u) / ewma_sum(1)
+
+由于这本质上是一个无限几何级数的累加，结果是可组合的，即ewma(A) + ewma(B) = ewma(A+B)。
+这个属性是关键，因为它提供了在任务迁移时重新组合平均数的能力。
+
+请注意，阻塞态的任务仍然对累加值（任务组分片和CPU运行队列）有贡献，这反映了
+它们在恢复运行后的预期贡献。
+
+利用这一点，我们跟踪2个关键指标：“运行”和“可运行”。“运行”反映了一个调度实体
+在CPU上花费的时间，而“可运行”反映了一个调度实体在运行队列中花费的时间。当只有
+一个任务时，这两个指标是相同的，但一旦出现对CPU的争用，“运行”将减少以反映每个
+任务在CPU上花费的时间，而“可运行”将增加以反映争用的激烈程度。
+
+更多细节见：kernel/sched/pelt.c
+
+
+频率 / CPU不变性
+================
+
+因为CPU频率在1GHz时利用率为50%和CPU频率在2GHz时利用率为50%是不一样的，同样
+在小核上运行时利用率为50%和在大核上运行时利用率为50%是不一样的，我们允许架构
+以两个比率来伸缩时间差，其中一个是动态电压频率升降（Dynamic Voltage and
+Frequency Scaling，DVFS）比率，另一个是微架构比率。
+
+对于简单的DVFS架构（软件有完全控制能力），我们可以很容易地计算该比率为::
+
+            f_cur
+  r_dvfs := -----
+            f_max
+
+对于由硬件控制DVFS的更多动态系统，我们使用硬件计数器（Intel APERF/MPERF，
+ARMv8.4-AMU）来计算这一比率。具体到Intel，我们使用::
+
+           APERF
+  f_cur := ----- * P0
+           MPERF
+
+             4C-turbo;  如果可用并且使能了turbo
+  f_max := { 1C-turbo;  如果使能了turbo
+             P0;        其它情况
+
+                    f_cur
+  r_dvfs := min( 1, ----- )
+                    f_max
+
+我们选择4C turbo而不是1C turbo，以使其更持久性略微更强。
+
+r_cpu被定义为当前CPU的最高性能水平与系统中任何其它CPU的最高性能水平的比率。
+
+  r_tot = r_dvfs * r_cpu
+
+其结果是，上述“运行”和“可运行”的指标变成DVFS无关和CPU型号无关了。也就是说，
+我们可以在CPU之间转移和比较它们。
+
+更多细节见:
+
+ - kernel/sched/pelt.h:update_rq_clock_pelt()
+ - arch/x86/kernel/smpboot.c:"APERF/MPERF frequency ratio computation."
+ - Documentation/translations/zh_CN/scheduler/sched-capacity.rst:"1. CPU Capacity + 2. Task utilization"
+
+
+UTIL_EST / UTIL_EST_FASTUP
+==========================
+
+由于周期性任务的平均数在睡眠时会衰减，而在运行时其预期利用率会和睡眠前相同，
+因此它们在再次运行后会面临（DVFS）的上涨。
+
+为了缓解这个问题，（一个默认使能的编译选项）UTIL_EST驱动一个无限脉冲响应
+（Infinite Impulse Response，IIR）的EWMA，“运行”值在出队时是最高的。
+另一个默认使能的编译选项UTIL_EST_FASTUP修改了IIR滤波器，使其允许立即增加，
+仅在利用率下降时衰减。
+
+进一步，运行队列的（可运行任务的）利用率之和由下式计算：
+
+  util_est := \Sum_t max( t_running, t_util_est_ewma )
+
+更多细节见: kernel/sched/fair.c:util_est_dequeue()
+
+
+UCLAMP
+======
+
+可以在每个CFS或RT任务上设置有效的u_min和u_max clamp值（译注：clamp可以理解
+为类似滤波器的能力，它定义了有效取值范围的最大值和最小值）；运行队列为所有正在
+运行的任务保持这些clamp的最大聚合值。
+
+更多细节见: include/uapi/linux/sched/types.h
+
+
+Schedutil / DVFS
+================
+
+每当调度器的负载跟踪被更新时（任务唤醒、任务迁移、时间流逝），我们都会调用
+schedutil来更新硬件DVFS状态。
+
+其基础是CPU运行队列的“运行”指标，根据上面的内容，它是CPU的频率不变的利用率
+估计值。由此我们计算出一个期望的频率，如下::
+
+             max( running, util_est );  如果使能UTIL_EST
+  u_cfs := { running;                   其它情况
+
+               clamp( u_cfs + u_rt, u_min, u_max );  如果使能UCLAMP_TASK
+  u_clamp := { u_cfs + u_rt;                         其它情况
+
+  u := u_clamp + u_irq + u_dl;		[估计值。更多细节见源代码]
+
+  f_des := min( f_max, 1.25 u * f_max )
+
+关于IO-wait的说明：当发生更新是因为任务从IO完成中唤醒时，我们提升上面的“u”。
+
+然后，这个频率被用来选择一个P-state或OPP，或者直接混入一个发给硬件的CPPC式
+请求。
+
+关于截止期限调度器的说明: 截止期限任务（偶发任务模型）使我们能够计算出满足
+工作负荷所需的硬f_min值。
+
+因为这些回调函数是直接来自调度器的，所以DVFS的硬件交互应该是“快速”和非阻塞的。
+在硬件交互缓慢和昂贵的时候，schedutil支持DVFS请求限速，不过会降低效率。
+
+更多信息见: kernel/sched/cpufreq_schedutil.c
+
+
+注意
+====
+
+ - 在低负载场景下，DVFS是最相关的，“运行”的值将密切反映利用率。
+
+ - 在负载饱和的场景下，任务迁移会导致一些瞬时性的使用率下降。假设我们有一个
+   CPU，有4个任务占用导致其饱和，接下来我们将一个任务迁移到另一个空闲CPU上，
+   旧的CPU的“运行”值将为0.75，而新的CPU将获得0.25。这是不可避免的，而且随着
+   时间流逝将自动修正。另注，由于没有空闲时间，我们还能保证f_max值吗？
+
+ - 上述大部分内容是关于避免DVFS下滑，以及独立的DVFS域发生负载迁移时不得不
+   重新学习/提升频率。
+
diff --git a/Documentation/translations/zh_CN/vm/damon/design.rst b/Documentation/translations/zh_CN/vm/damon/design.rst
index 05f66c02740a..46128b77c2b3 100644
--- a/Documentation/translations/zh_CN/vm/damon/design.rst
+++ b/Documentation/translations/zh_CN/vm/damon/design.rst
@@ -77,7 +77,7 @@ DAMON目前为物理和虚拟地址空间提供了基元的实现。下面两个
 ========================
 
 下面四个部分分别描述了DAMON的核心机制和五个监测属性，即 ``采样间隔`` 、 ``聚集间隔`` 、
-``区域更新间隔`` 、 ``最小区域数`` 和 ``最大区域数`` 。
+``更新间隔`` 、 ``最小区域数`` 和 ``最大区域数`` 。
 
 
 访问频率监测
@@ -135,5 +135,6 @@ DAMON的输出显示了在给定的时间内哪些页面的访问频率是多少
 监测目标地址范围可以动态改变。例如，虚拟内存可以动态地被映射和解映射。物理内存可以被
 热插拔。
 
-由于在某些情况下变化可能相当频繁，DAMON检查动态内存映射的变化，并仅在用户指定的时间
-间隔（ ``区域更新间隔`` ）内将其应用于抽象的目标区域。
+由于在某些情况下变化可能相当频繁，DAMON允许监控操作检查动态变化，包括内存映射变化，
+并仅在用户指定的时间间隔（ ``更新间隔`` ）中的每个时间段，将其应用于监控操作相关的
+数据结构，如抽象的监控目标内存区。
+\ No newline at end of file
diff --git a/Documentation/translations/zh_CN/vm/frontswap.rst b/Documentation/translations/zh_CN/vm/frontswap.rst
new file mode 100644
index 000000000000..3eb07870e2ef
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/frontswap.rst
@@ -0,0 +1,196 @@
+:Original: Documentation/vm/_free_page_reporting.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+=========
+Frontswap
+=========
+
+Frontswap为交换页提供了一个 “transcendent memory” 的接口。在一些环境中，由
+于交换页被保存在RAM（或类似RAM的设备）中，而不是交换磁盘，因此可以获得巨大的性能
+节省（提高）。
+
+.. _Transcendent memory in a nutshell: https://lwn.net/Articles/454795/
+
+Frontswap之所以这么命名，是因为它可以被认为是与swap设备的“back”存储相反。存
+储器被认为是一个同步并发安全的面向页面的“伪RAM设备”，符合transcendent memory
+（如Xen的“tmem”，或内核内压缩内存，又称“zcache”，或未来的类似RAM的设备）的要
+求；这个伪RAM设备不能被内核直接访问或寻址，其大小未知且可能随时间变化。驱动程序通过
+调用frontswap_register_ops将自己与frontswap链接起来，以适当地设置frontswap_ops
+的功能，它提供的功能必须符合某些策略，如下所示:
+
+一个 “init” 将设备准备好接收与指定的交换设备编号（又称“类型”）相关的frontswap
+交换页。一个 “store” 将把该页复制到transcendent memory，并与该页的类型和偏移
+量相关联。一个 “load” 将把该页，如果找到的话，从transcendent memory复制到内核
+内存，但不会从transcendent memory中删除该页。一个 “invalidate_page” 将从
+transcendent memory中删除该页，一个 “invalidate_area” 将删除所有与交换类型
+相关的页（例如，像swapoff）并通知 “device” 拒绝进一步存储该交换类型。
+
+一旦一个页面被成功存储，在该页面上的匹配加载通常会成功。因此，当内核发现自己处于需
+要交换页面的情况时，它首先尝试使用frontswap。如果存储的结果是成功的，那么数据就已
+经成功的保存到了transcendent memory中，并且避免了磁盘写入，如果后来再读回数据，
+也避免了磁盘读取。如果存储返回失败，transcendent memory已经拒绝了该数据，且该页
+可以像往常一样被写入交换空间。
+
+请注意，如果一个页面被存储，而该页面已经存在于transcendent memory中（一个 “重复”
+的存储），要么存储成功，数据被覆盖，要么存储失败，该页面被废止。这确保了旧的数据永远
+不会从frontswap中获得。
+
+如果配置正确，对frontswap的监控是通过 `/sys/kernel/debug/frontswap` 目录下的
+debugfs完成的。frontswap的有效性可以通过以下方式测量（在所有交换设备中）:
+
+``failed_stores``
+	有多少次存储的尝试是失败的
+
+``loads``
+	尝试了多少次加载（应该全部成功）
+
+``succ_stores``
+	有多少次存储的尝试是成功的
+
+``invalidates``
+	尝试了多少次作废
+
+后台实现可以提供额外的指标。
+
+经常问到的问题
+==============
+
+* 价值在哪里?
+
+当一个工作负载开始交换时，性能就会下降。Frontswap通过提供一个干净的、动态的接口来
+读取和写入交换页到 “transcendent memory”，从而大大增加了许多这样的工作负载的性
+能，否则内核是无法直接寻址的。当数据被转换为不同的形式和大小（比如压缩）或者被秘密
+移动（对于一些类似RAM的设备来说，这可能对写平衡很有用）时，这个接口是理想的。交换
+页（和被驱逐的页面缓存页）是这种比RAM慢但比磁盘快得多的“伪RAM设备”的一大用途。
+
+Frontswap对内核的影响相当小，为各种系统配置中更动态、更灵活的RAM利用提供了巨大的
+灵活性：
+
+在单一内核的情况下，又称“zcache”，页面被压缩并存储在本地内存中，从而增加了可以安
+全保存在RAM中的匿名页面总数。Zcache本质上是用压缩/解压缩的CPU周期换取更好的内存利
+用率。Benchmarks测试显示，当内存压力较低时，几乎没有影响，而在高内存压力下的一些
+工作负载上，则有明显的性能改善（25%以上）。
+
+“RAMster” 在zcache的基础上增加了对集群系统的 “peer-to-peer” transcendent memory
+的支持。Frontswap页面像zcache一样被本地压缩，但随后被“remotified” 到另一个系
+统的RAM。这使得RAM可以根据需要动态地来回负载平衡，也就是说，当系统A超载时，它可以
+交换到系统B，反之亦然。RAMster也可以被配置成一个内存服务器，因此集群中的许多服务器
+可以根据需要动态地交换到配置有大量内存的单一服务器上......而不需要预先配置每个客户
+有多少内存可用
+
+在虚拟情况下，虚拟化的全部意义在于统计地将物理资源在多个虚拟机的不同需求之间进行复
+用。对于RAM来说，这真的很难做到，而且在不改变内核的情况下，要做好这一点的努力基本上
+是失败的（除了一些广为人知的特殊情况下的工作负载）。具体来说，Xen Transcendent Memory
+后端允许管理器拥有的RAM “fallow”，不仅可以在多个虚拟机之间进行“time-shared”，
+而且页面可以被压缩和重复利用，以优化RAM的利用率。当客户操作系统被诱导交出未充分利用
+的RAM时（如 “selfballooning”），突然出现的意外内存压力可能会导致交换；frontswap
+允许这些页面被交换到管理器RAM中或从管理器RAM中交换（如果整体主机系统内存条件允许），
+从而减轻计划外交换可能带来的可怕的性能影响。
+
+一个KVM的实现正在进行中，并且已经被RFC'ed到lkml。而且，利用frontswap，对NVM作为
+内存扩展技术的调查也在进行中。
+
+* 当然，在某些情况下可能有性能上的优势，但frontswap的空间/时间开销是多少？
+
+如果 CONFIG_FRONTSWAP 被禁用，每个 frontswap 钩子都会编译成空，唯一的开销是每
+个 swapon'ed swap 设备的几个额外字节。如果 CONFIG_FRONTSWAP 被启用，但没有
+frontswap的 “backend” 寄存器，每读或写一个交换页就会有一个额外的全局变量，而不
+是零。如果 CONFIG_FRONTSWAP 被启用，并且有一个frontswap的backend寄存器，并且
+后端每次 “store” 请求都失败（即尽管声称可能，但没有提供内存），CPU 的开销仍然可以
+忽略不计 - 因为每次frontswap失败都是在交换页写到磁盘之前，系统很可能是 I/O 绑定
+的，无论如何使用一小部分的 CPU 都是不相关的。
+
+至于空间，如果CONFIG_FRONTSWAP被启用，并且有一个frontswap的backend注册，那么
+每个交换设备的每个交换页都会被分配一个比特。这是在内核已经为每个交换设备的每个交换
+页分配的8位（在2.6.34之前是16位）上增加的。(Hugh Dickins观察到，frontswap可能
+会偷取现有的8个比特，但是我们以后再来担心这个小的优化问题)。对于标准的4K页面大小的
+非常大的交换盘（这很罕见），这是每32GB交换盘1MB开销。
+
+当交换页存储在transcendent memory中而不是写到磁盘上时，有一个副作用，即这可能会
+产生更多的内存压力，有可能超过其他的优点。一个backend，比如zcache，必须实现策略
+来仔细（但动态地）管理内存限制，以确保这种情况不会发生。
+
+* 好吧，那就用内核骇客能理解的术语来快速概述一下这个frontswap补丁的作用如何？
+
+我们假设在内核初始化过程中，一个frontswap 的 “backend” 已经注册了；这个注册表
+明这个frontswap 的 “backend” 可以访问一些不被内核直接访问的“内存”。它到底提
+供了多少内存是完全动态和随机的。
+
+每当一个交换设备被交换时，就会调用frontswap_init()，把交换设备的编号（又称“类
+型”）作为一个参数传给它。这就通知了frontswap，以期待 “store” 与该号码相关的交
+换页的尝试。
+
+每当交换子系统准备将一个页面写入交换设备时（参见swap_writepage()），就会调用
+frontswap_store。Frontswap与frontswap backend协商，如果backend说它没有空
+间，frontswap_store返回-1，内核就会照常把页换到交换设备上。注意，来自frontswap
+backend的响应对内核来说是不可预测的；它可能选择从不接受一个页面，可能接受每九个
+页面，也可能接受每一个页面。但是如果backend确实接受了一个页面，那么这个页面的数
+据已经被复制并与类型和偏移量相关联了，而且backend保证了数据的持久性。在这种情况
+下，frontswap在交换设备的“frontswap_map” 中设置了一个位，对应于交换设备上的
+页面偏移量，否则它就会将数据写入该设备。
+
+当交换子系统需要交换一个页面时（swap_readpage()），它首先调用frontswap_load()，
+检查frontswap_map，看这个页面是否早先被frontswap backend接受。如果是，该页
+的数据就会从frontswap后端填充，换入就完成了。如果不是，正常的交换代码将被执行，
+以便从真正的交换设备上获得这一页的数据。
+
+所以每次frontswap backend接受一个页面时，交换设备的读取和（可能）交换设备的写
+入都被 “frontswap backend store” 和（可能）“frontswap backend loads”
+所取代，这可能会快得多。
+
+* frontswap不能被配置为一个 “特殊的” 交换设备，它的优先级要高于任何真正的交换
+  设备（例如像zswap，或者可能是swap-over-nbd/NFS）？
+
+首先，现有的交换子系统不允许有任何种类的交换层次结构。也许它可以被重写以适应层次
+结构，但这将需要相当大的改变。即使它被重写，现有的交换子系统也使用了块I/O层，它
+假定交换设备是固定大小的，其中的任何页面都是可线性寻址的。Frontswap几乎没有触
+及现有的交换子系统，而是围绕着块I/O子系统的限制，提供了大量的灵活性和动态性。
+
+例如，frontswap backend对任何交换页的接受是完全不可预测的。这对frontswap backend
+的定义至关重要，因为它赋予了backend完全动态的决定权。在zcache中，人们无法预
+先知道一个页面的可压缩性如何。可压缩性 “差” 的页面会被拒绝，而 “差” 本身也可
+以根据当前的内存限制动态地定义。
+
+此外，frontswap是完全同步的，而真正的交换设备，根据定义，是异步的，并且使用
+块I/O。块I/O层不仅是不必要的，而且可能进行 “优化”，这对面向RAM的设备来说是
+不合适的，包括将一些页面的写入延迟相当长的时间。同步是必须的，以确保后端的动
+态性，并避免棘手的竞争条件，这将不必要地大大增加frontswap和/或块I/O子系统的
+复杂性。也就是说，只有最初的 “store” 和 “load” 操作是需要同步的。一个独立
+的异步线程可以自由地操作由frontswap存储的页面。例如，RAMster中的 “remotification”
+线程使用标准的异步内核套接字，将压缩的frontswap页面移动到远程机器。同样，
+KVM的客户方实现可以进行客户内压缩，并使用 “batched” hypercalls。
+
+在虚拟化环境中，动态性允许管理程序（或主机操作系统）做“intelligent overcommit”。
+例如，它可以选择只接受页面，直到主机交换可能即将发生，然后强迫客户机做他们
+自己的交换。
+
+transcendent memory规格的frontswap有一个坏处。因为任何 “store” 都可
+能失败，所以必须在一个真正的交换设备上有一个真正的插槽来交换页面。因此，
+frontswap必须作为每个交换设备的 “影子” 来实现，它有可能容纳交换设备可能
+容纳的每一个页面，也有可能根本不容纳任何页面。这意味着frontswap不能包含比
+swap设备总数更多的页面。例如，如果在某些安装上没有配置交换设备，frontswap
+就没有用。无交换设备的便携式设备仍然可以使用frontswap，但是这种设备的
+backend必须配置某种 “ghost” 交换设备，并确保它永远不会被使用。
+
+
+* 为什么会有这种关于 “重复存储” 的奇怪定义？如果一个页面以前被成功地存储过，
+  难道它不能总是被成功地覆盖吗？
+
+几乎总是可以的，不，有时不能。考虑一个例子，数据被压缩了，原来的4K页面被压
+缩到了1K。现在，有人试图用不可压缩的数据覆盖该页，因此会占用整个4K。但是
+backend没有更多的空间了。在这种情况下，这个存储必须被拒绝。每当frontswap
+拒绝一个会覆盖的存储时，它也必须使旧的数据作废，并确保它不再被访问。因为交
+换子系统会把新的数据写到读交换设备上，这是确保一致性的正确做法。
+
+* 为什么frontswap补丁会创建新的头文件swapfile.h？
+
+frontswap代码依赖于一些swap子系统内部的数据结构，这些数据结构多年来一直
+在静态和全局之间来回移动。这似乎是一个合理的妥协：将它们定义为全局，但在一
+个新的包含文件中声明它们，该文件不被包含swap.h的大量源文件所包含。
+
+Dan Magenheimer，最后更新于2012年4月9日
diff --git a/Documentation/translations/zh_CN/vm/hmm.rst b/Documentation/translations/zh_CN/vm/hmm.rst
new file mode 100644
index 000000000000..2379df95aa58
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/hmm.rst
@@ -0,0 +1,361 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/hmm.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+==================
+异构内存管理 (HMM)
+==================
+
+提供基础设施和帮助程序以将非常规内存（设备内存，如板上 GPU 内存）集成到常规内核路径中，其
+基石是此类内存的专用struct page（请参阅本文档的第 5 至 7 节）。
+
+HMM 还为 SVM（共享虚拟内存）提供了可选的帮助程序，即允许设备透明地访问与 CPU 一致的程序
+地址，这意味着 CPU 上的任何有效指针也是该设备的有效指针。这对于简化高级异构计算的使用变得
+必不可少，其中 GPU、DSP 或 FPGA 用于代表进程执行各种计算。
+
+本文档分为以下部分：在第一部分中，我揭示了与使用特定于设备的内存分配器相关的问题。在第二
+部分中，我揭示了许多平台固有的硬件限制。第三部分概述了 HMM 设计。第四部分解释了 CPU 页
+表镜像的工作原理以及 HMM 在这种情况下的目的。第五部分处理内核中如何表示设备内存。最后，
+最后一节介绍了一个新的迁移助手，它允许利用设备 DMA 引擎。
+
+.. contents:: :local:
+
+使用特定于设备的内存分配器的问题
+================================
+
+具有大量板载内存（几 GB）的设备（如 GPU）历来通过专用驱动程序特定 API 管理其内存。这会
+造成设备驱动程序分配和管理的内存与常规应用程序内存（私有匿名、共享内存或常规文件支持内存）
+之间的隔断。从这里开始，我将把这个方面称为分割的地址空间。我使用共享地址空间来指代相反的情况：
+即，设备可以透明地使用任何应用程序内存区域。
+
+分割的地址空间的发生是因为设备只能访问通过设备特定 API 分配的内存。这意味着从设备的角度来
+看，程序中的所有内存对象并不平等，这使得依赖于广泛的库的大型程序变得复杂。
+
+具体来说，这意味着想要利用像 GPU 这样的设备的代码需要在通用分配的内存（malloc、mmap
+私有、mmap 共享）和通过设备驱动程序 API 分配的内存之间复制对象（这仍然以 mmap 结束，
+但是是设备文件）。
+
+对于平面数据集（数组、网格、图像……），这并不难实现，但对于复杂数据集（列表、树……），
+很难做到正确。复制一个复杂的数据集需要重新映射其每个元素之间的所有指针关系。这很容易出错，
+而且由于数据集和地址的重复，程序更难调试。
+
+分割地址空间也意味着库不能透明地使用它们从核心程序或另一个库中获得的数据，因此每个库可能
+不得不使用设备特定的内存分配器来重复其输入数据集。大型项目会因此受到影响，并因为各种内存
+拷贝而浪费资源。
+
+复制每个库的API以接受每个设备特定分配器分配的内存作为输入或输出，并不是一个可行的选择。
+这将导致库入口点的组合爆炸。
+
+最后，随着高级语言结构（在 C++ 中，当然也在其他语言中）的进步，编译器现在有可能在没有程
+序员干预的情况下利用 GPU 和其他设备。某些编译器识别的模式仅适用于共享地址空间。对所有
+其他模式，使用共享地址空间也更合理。
+
+
+I/O 总线、设备内存特性
+======================
+
+由于一些限制，I/O 总线削弱了共享地址空间。大多数 I/O 总线只允许从设备到主内存的基本
+内存访问；甚至缓存一致性通常是可选的。从 CPU 访问设备内存甚至更加有限。通常情况下，它
+不是缓存一致的。
+
+如果我们只考虑 PCIE 总线，那么设备可以访问主内存（通常通过 IOMMU）并与 CPU 缓存一
+致。但是，它只允许设备对主存储器进行一组有限的原子操作。这在另一个方向上更糟：CPU
+只能访问有限范围的设备内存，而不能对其执行原子操作。因此，从内核的角度来看，设备内存不
+能被视为与常规内存等同。
+
+另一个严重的因素是带宽有限（约 32GBytes/s，PCIE 4.0 和 16 通道）。这比最快的 GPU
+内存 (1 TBytes/s) 慢 33 倍。最后一个限制是延迟。从设备访问主内存的延迟比设备访问自
+己的内存时高一个数量级。
+
+一些平台正在开发新的 I/O 总线或对 PCIE 的添加/修改以解决其中一些限制
+（OpenCAPI、CCIX）。它们主要允许 CPU 和设备之间的双向缓存一致性，并允许架构支持的所
+有原子操作。遗憾的是，并非所有平台都遵循这一趋势，并且一些主要架构没有针对这些问题的硬
+件解决方案。
+
+因此，为了使共享地址空间有意义，我们不仅必须允许设备访问任何内存，而且还必须允许任何内
+存在设备使用时迁移到设备内存（在迁移时阻止 CPU 访问）。
+
+
+共享地址空间和迁移
+==================
+
+HMM 打算提供两个主要功能。第一个是通过复制cpu页表到设备页表中来共享地址空间，因此对
+于进程地址空间中的任何有效主内存地址，相同的地址指向相同的物理内存。
+
+为了实现这一点，HMM 提供了一组帮助程序来填充设备页表，同时跟踪 CPU 页表更新。设备页表
+更新不像 CPU 页表更新那么容易。要更新设备页表，您必须分配一个缓冲区（或使用预先分配的
+缓冲区池）并在其中写入 GPU 特定命令以执行更新（取消映射、缓存失效和刷新等）。这不能通
+过所有设备的通用代码来完成。因此，为什么HMM提供了帮助器，在把硬件的具体细节留给设备驱
+动程序的同时，把一切可以考虑的因素都考虑进去了。
+
+HMM 提供的第二种机制是一种新的 ZONE_DEVICE 内存，它允许为设备内存的每个页面分配一个
+struct page。这些页面很特殊，因为 CPU 无法映射它们。然而，它们允许使用现有的迁移机
+制将主内存迁移到设备内存，从 CPU 的角度来看，一切看起来都像是换出到磁盘的页面。使用
+struct page可以与现有的 mm 机制进行最简单、最干净的集成。再次，HMM 仅提供帮助程序，
+首先为设备内存热插拔新的 ZONE_DEVICE 内存，然后执行迁移。迁移内容和时间的策略决定留
+给设备驱动程序。
+
+请注意，任何 CPU 对设备页面的访问都会触发缺页异常并迁移回主内存。例如，当支持给定CPU
+地址 A 的页面从主内存页面迁移到设备页面时，对地址 A 的任何 CPU 访问都会触发缺页异常
+并启动向主内存的迁移。
+
+凭借这两个特性，HMM 不仅允许设备镜像进程地址空间并保持 CPU 和设备页表同步，而且还通
+过迁移设备正在使用的数据集部分来利用设备内存。
+
+
+地址空间镜像实现和API
+=====================
+
+地址空间镜像的主要目标是允许将一定范围的 CPU 页表复制到一个设备页表中；HMM 有助于
+保持两者同步。想要镜像进程地址空间的设备驱动程序必须从注册 mmu_interval_notifier
+开始::
+
+ int mmu_interval_notifier_insert(struct mmu_interval_notifier *interval_sub,
+				  struct mm_struct *mm, unsigned long start,
+				  unsigned long length,
+				  const struct mmu_interval_notifier_ops *ops);
+
+在 ops->invalidate() 回调期间，设备驱动程序必须对范围执行更新操作（将范围标记为只
+读，或完全取消映射等）。设备必须在驱动程序回调返回之前完成更新。
+
+当设备驱动程序想要填充一个虚拟地址范围时，它可以使用::
+
+  int hmm_range_fault(struct hmm_range *range);
+
+如果请求写访问，它将在丢失或只读条目上触发缺页异常（见下文）。缺页异常使用通用的 mm 缺
+页异常代码路径，就像 CPU 缺页异常一样。
+
+这两个函数都将 CPU 页表条目复制到它们的 pfns 数组参数中。该数组中的每个条目对应于虚拟
+范围中的一个地址。HMM 提供了一组标志来帮助驱动程序识别特殊的 CPU 页表项。
+
+在 sync_cpu_device_pagetables() 回调中锁定是驱动程序必须尊重的最重要的方面，以保
+持事物正确同步。使用模式是::
+
+ int driver_populate_range(...)
+ {
+      struct hmm_range range;
+      ...
+
+      range.notifier = &interval_sub;
+      range.start = ...;
+      range.end = ...;
+      range.hmm_pfns = ...;
+
+      if (!mmget_not_zero(interval_sub->notifier.mm))
+          return -EFAULT;
+
+ again:
+      range.notifier_seq = mmu_interval_read_begin(&interval_sub);
+      mmap_read_lock(mm);
+      ret = hmm_range_fault(&range);
+      if (ret) {
+          mmap_read_unlock(mm);
+          if (ret == -EBUSY)
+                 goto again;
+          return ret;
+      }
+      mmap_read_unlock(mm);
+
+      take_lock(driver->update);
+      if (mmu_interval_read_retry(&ni, range.notifier_seq) {
+          release_lock(driver->update);
+          goto again;
+      }
+
+      /* Use pfns array content to update device page table,
+       * under the update lock */
+
+      release_lock(driver->update);
+      return 0;
+ }
+
+driver->update 锁与驱动程序在其 invalidate() 回调中使用的锁相同。该锁必须在调用
+mmu_interval_read_retry() 之前保持，以避免与并发 CPU 页表更新发生任何竞争。
+
+利用 default_flags 和 pfn_flags_mask
+====================================
+
+hmm_range 结构有 2 个字段，default_flags 和 pfn_flags_mask，它们指定整个范围
+的故障或快照策略，而不必为 pfns 数组中的每个条目设置它们。
+
+例如，如果设备驱动程序需要至少具有读取权限的范围的页面，它会设置::
+
+    range->default_flags = HMM_PFN_REQ_FAULT;
+    range->pfn_flags_mask = 0;
+
+并如上所述调用 hmm_range_fault()。这将填充至少具有读取权限的范围内的所有页面。
+
+现在假设驱动程序想要做同样的事情，除了它想要拥有写权限的范围内的一页。现在驱动程序设
+置::
+
+    range->default_flags = HMM_PFN_REQ_FAULT;
+    range->pfn_flags_mask = HMM_PFN_REQ_WRITE;
+    range->pfns[index_of_write] = HMM_PFN_REQ_WRITE;
+
+有了这个，HMM 将在至少读取（即有效）的所有页面中异常，并且对于地址
+== range->start + (index_of_write << PAGE_SHIFT) 它将异常写入权限，即，如果
+CPU pte 没有设置写权限，那么HMM将调用handle_mm_fault()。
+
+hmm_range_fault 完成后，标志位被设置为页表的当前状态，即 HMM_PFN_VALID | 如果页
+面可写，将设置 HMM_PFN_WRITE。
+
+
+从核心内核的角度表示和管理设备内存
+==================================
+
+尝试了几种不同的设计来支持设备内存。第一个使用特定于设备的数据结构来保存有关迁移内存
+的信息，HMM 将自身挂接到 mm 代码的各个位置，以处理对设备内存支持的地址的任何访问。
+事实证明，这最终复制了 struct page 的大部分字段，并且还需要更新许多内核代码路径才
+能理解这种新的内存类型。
+
+大多数内核代码路径从不尝试访问页面后面的内存，而只关心struct page的内容。正因为如此，
+HMM 切换到直接使用 struct page 用于设备内存，这使得大多数内核代码路径不知道差异。
+我们只需要确保没有人试图从 CPU 端映射这些页面。
+
+移入和移出设备内存
+==================
+
+由于 CPU 无法直接访问设备内存，因此设备驱动程序必须使用硬件 DMA 或设备特定的加载/存
+储指令来迁移数据。migrate_vma_setup()、migrate_vma_pages() 和
+migrate_vma_finalize() 函数旨在使驱动程序更易于编写并集中跨驱动程序的通用代码。
+
+在将页面迁移到设备私有内存之前，需要创建特殊的设备私有 ``struct page`` 。这些将用
+作特殊的“交换”页表条目，以便 CPU 进程在尝试访问已迁移到设备专用内存的页面时会发生异常。
+
+这些可以通过以下方式分配和释放::
+
+    struct resource *res;
+    struct dev_pagemap pagemap;
+
+    res = request_free_mem_region(&iomem_resource, /* number of bytes */,
+                                  "name of driver resource");
+    pagemap.type = MEMORY_DEVICE_PRIVATE;
+    pagemap.range.start = res->start;
+    pagemap.range.end = res->end;
+    pagemap.nr_range = 1;
+    pagemap.ops = &device_devmem_ops;
+    memremap_pages(&pagemap, numa_node_id());
+
+    memunmap_pages(&pagemap);
+    release_mem_region(pagemap.range.start, range_len(&pagemap.range));
+
+还有devm_request_free_mem_region(), devm_memremap_pages(),
+devm_memunmap_pages() 和 devm_release_mem_region() 当资源可以绑定到 ``struct device``.
+
+整体迁移步骤类似于在系统内存中迁移 NUMA 页面(see :ref:`Page migration <page_migration>`) ，
+但这些步骤分为设备驱动程序特定代码和共享公共代码:
+
+1. ``mmap_read_lock()``
+
+   设备驱动程序必须将 ``struct vm_area_struct`` 传递给migrate_vma_setup()，
+   因此需要在迁移期间保留 mmap_read_lock() 或 mmap_write_lock()。
+
+2. ``migrate_vma_setup(struct migrate_vma *args)``
+
+   设备驱动初始化了 ``struct migrate_vma`` 的字段，并将该指针传递给
+   migrate_vma_setup()。``args->flags`` 字段是用来过滤哪些源页面应该被迁移。
+   例如，设置 ``MIGRATE_VMA_SELECT_SYSTEM`` 将只迁移系统内存，设置
+   ``MIGRATE_VMA_SELECT_DEVICE_PRIVATE`` 将只迁移驻留在设备私有内存中的页
+   面。如果后者被设置， ``args->pgmap_owner`` 字段被用来识别驱动所拥有的设备
+   私有页。这就避免了试图迁移驻留在其他设备中的设备私有页。目前，只有匿名的私有VMA
+   范围可以被迁移到系统内存和设备私有内存。
+
+   migrate_vma_setup()所做的第一步是用 ``mmu_notifier_invalidate_range_start()``
+   和 ``mmu_notifier_invalidate_range_end()`` 调用来遍历设备周围的页表，使
+   其他设备的MMU无效，以便在 ``args->src`` 数组中填写要迁移的PFN。
+   ``invalidate_range_start()`` 回调传递给一个``struct mmu_notifier_range`` ，
+   其 ``event`` 字段设置为MMU_NOTIFY_MIGRATE， ``owner`` 字段设置为传递给
+   migrate_vma_setup()的 ``args->pgmap_owner`` 字段。这允许设备驱动跳过无
+   效化回调，只无效化那些实际正在迁移的设备私有MMU映射。这一点将在下一节详细解释。
+
+
+   在遍历页表时，一个 ``pte_none()`` 或 ``is_zero_pfn()`` 条目导致一个有效
+   的  “zero” PFN 存储在 ``args->src`` 阵列中。这让驱动分配设备私有内存并清
+   除它，而不是复制一个零页。到系统内存或设备私有结构页的有效PTE条目将被
+   ``lock_page()``锁定，与LRU隔离（如果系统内存和设备私有页不在LRU上），从进
+   程中取消映射，并插入一个特殊的迁移PTE来代替原来的PTE。 migrate_vma_setup()
+   还清除了 ``args->dst`` 数组。
+
+3. 设备驱动程序分配目标页面并将源页面复制到目标页面。
+
+   驱动程序检查每个 ``src`` 条目以查看该 ``MIGRATE_PFN_MIGRATE`` 位是否已
+   设置并跳过未迁移的条目。设备驱动程序还可以通过不填充页面的 ``dst`` 数组来选
+   择跳过页面迁移。
+
+   然后，驱动程序分配一个设备私有 struct page 或一个系统内存页，用 ``lock_page()``
+   锁定该页，并将 ``dst`` 数组条目填入::
+
+     dst[i] = migrate_pfn(page_to_pfn(dpage));
+
+   现在驱动程序知道这个页面正在被迁移，它可以使设备私有 MMU 映射无效并将设备私有
+   内存复制到系统内存或另一个设备私有页面。由于核心 Linux 内核会处理 CPU 页表失
+   效，因此设备驱动程序只需使其自己的 MMU 映射失效。
+
+   驱动程序可以使用 ``migrate_pfn_to_page(src[i])`` 来获取源设备的
+   ``struct page`` 面，并将源页面复制到目标设备上，如果指针为 ``NULL`` ，意
+   味着源页面没有被填充到系统内存中，则清除目标设备的私有内存。
+
+4. ``migrate_vma_pages()``
+
+   这一步是实际“提交”迁移的地方。
+
+   如果源页是 ``pte_none()`` 或 ``is_zero_pfn()`` 页，这时新分配的页会被插
+   入到CPU的页表中。如果一个CPU线程在同一页面上发生异常，这可能会失败。然而，页
+   表被锁定，只有一个新页会被插入。如果它失去了竞争，设备驱动将看到
+   ``MIGRATE_PFN_MIGRATE`` 位被清除。
+
+   如果源页被锁定、隔离等，源 ``struct page`` 信息现在被复制到目标
+   ``struct page`` ，最终完成CPU端的迁移。
+
+5. 设备驱动为仍在迁移的页面更新设备MMU页表，回滚未迁移的页面。
+
+   如果 ``src`` 条目仍然有 ``MIGRATE_PFN_MIGRATE`` 位被设置，设备驱动可以
+   更新设备MMU，如果 ``MIGRATE_PFN_WRITE`` 位被设置，则设置写启用位。
+
+6. ``migrate_vma_finalize()``
+
+   这一步用新页的页表项替换特殊的迁移页表项，并释放对源和目的 ``struct page``
+   的引用。
+
+7. ``mmap_read_unlock()``
+
+   现在可以释放锁了。
+
+独占访问存储器
+==============
+
+一些设备具有诸如原子PTE位的功能，可以用来实现对系统内存的原子访问。为了支持对一
+个共享的虚拟内存页的原子操作，这样的设备需要对该页的访问是排他的，而不是来自CPU
+的任何用户空间访问。  ``make_device_exclusive_range()`` 函数可以用来使一
+个内存范围不能从用户空间访问。
+
+这将用特殊的交换条目替换给定范围内的所有页的映射。任何试图访问交换条目的行为都会
+导致一个异常，该异常会通过用原始映射替换该条目而得到恢复。驱动程序会被通知映射已
+经被MMU通知器改变，之后它将不再有对该页的独占访问。独占访问被保证持续到驱动程序
+放弃页面锁和页面引用为止，这时页面上的任何CPU异常都可以按所述进行。
+
+内存 cgroup (memcg) 和 rss 统计
+===============================
+
+目前，设备内存被视为 rss 计数器中的任何常规页面（如果设备页面用于匿名，则为匿名，
+如果设备页面用于文件支持页面，则为文件，如果设备页面用于共享内存，则为 shmem）。
+这是为了保持现有应用程序的故意选择，这些应用程序可能在不知情的情况下开始使用设备
+内存，运行不受影响。
+
+一个缺点是 OOM 杀手可能会杀死使用大量设备内存而不是大量常规系统内存的应用程序，
+因此不会释放太多系统内存。在决定以不同方式计算设备内存之前，我们希望收集更多关
+于应用程序和系统在存在设备内存的情况下在内存压力下如何反应的实际经验。
+
+对内存 cgroup 做出了相同的决定。设备内存页面根据相同的内存 cgroup 计算，常规
+页面将被计算在内。这确实简化了进出设备内存的迁移。这也意味着从设备内存迁移回常规
+内存不会失败，因为它会超过内存 cgroup 限制。一旦我们对设备内存的使用方式及其对
+内存资源控制的影响有了更多的了解，我们可能会在后面重新考虑这个选择。
+
+请注意，设备内存永远不能由设备驱动程序或通过 GUP 固定，因此此类内存在进程退出时
+总是被释放的。或者在共享内存或文件支持内存的情况下，当删除最后一个引用时。
diff --git a/Documentation/translations/zh_CN/vm/hugetlbfs_reserv.rst b/Documentation/translations/zh_CN/vm/hugetlbfs_reserv.rst
new file mode 100644
index 000000000000..c6d471ce2131
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/hugetlbfs_reserv.rst
@@ -0,0 +1,436 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/hugetlbfs_reserv.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+==============
+Hugetlbfs 预留
+==============
+
+概述
+====
+
+:ref:`hugetlbpage` 中描述的巨页通常是预先分配给应用程序使用的。如果VMA指
+示要使用巨页，这些巨页会在缺页异常时被实例化到任务的地址空间。如果在缺页异常
+时没有巨页存在，任务就会被发送一个SIGBUS，并经常不高兴地死去。在加入巨页支
+持后不久，人们决定，在mmap()时检测巨页的短缺情况会更好。这个想法是，如果
+没有足够的巨页来覆盖映射，mmap()将失败。这首先是在mmap()时在代码中做一个
+简单的检查，以确定是否有足够的空闲巨页来覆盖映射。就像内核中的大多数东西一
+样，代码随着时间的推移而不断发展。然而，基本的想法是在mmap()时 “预留”
+巨页，以确保巨页可以用于该映射中的缺页异常。下面的描述试图描述在v4.10内核
+中是如何进行巨页预留处理的。
+
+
+读者
+====
+这个描述主要是针对正在修改hugetlbfs代码的内核开发者。
+
+
+数据结构
+========
+
+resv_huge_pages
+	这是一个全局的（per-hstate）预留的巨页的计数。预留的巨页只对预留它们的任
+	务可用。因此，一般可用的巨页的数量被计算为(``free_huge_pages - resv_huge_pages``)。
+Reserve Map
+	预留映射由以下结构体描述::
+
+		struct resv_map {
+			struct kref refs;
+			spinlock_t lock;
+			struct list_head regions;
+			long adds_in_progress;
+			struct list_head region_cache;
+			long region_cache_count;
+		};
+
+	系统中每个巨页映射都有一个预留映射。resv_map中的regions列表描述了映射中的
+	区域。一个区域被描述为::
+
+		struct file_region {
+			struct list_head link;
+			long from;
+			long to;
+		};
+
+	file_region结构体的 ‘from’ 和 ‘to’ 字段是进入映射的巨页索引。根据映射的类型，在
+	reserv_map 中的一个区域可能表示该范围存在预留，或预留不存在。
+Flags for MAP_PRIVATE Reservations
+	这些被存储在预留的映射指针的底部。
+
+	``#define HPAGE_RESV_OWNER    (1UL << 0)``
+		表示该任务是与该映射相关的预留的所有者。
+	``#define HPAGE_RESV_UNMAPPED (1UL << 1)``
+		表示最初映射此范围（并创建储备）的任务由于COW失败而从该任务（子任务）中取消映
+		射了一个页面。
+Page Flags
+	PagePrivate页面标志是用来指示在释放巨页时必须恢复巨页的预留。更多细节将在
+	“释放巨页” 一节中讨论。
+
+
+预留映射位置（私有或共享）
+==========================
+
+一个巨页映射或段要么是私有的，要么是共享的。如果是私有的，它通常只对一个地址空间
+（任务）可用。如果是共享的，它可以被映射到多个地址空间（任务）。对于这两种类型的映射，
+预留映射的位置和语义是明显不同的。位置的差异是：
+
+- 对于私有映射，预留映射挂在VMA结构体上。具体来说，就是vma->vm_private_data。这个保
+  留映射是在创建映射（mmap(MAP_PRIVATE)）时创建的。
+- 对于共享映射，预留映射挂在inode上。具体来说，就是inode->i_mapping->private_data。
+  由于共享映射总是由hugetlbfs文件系统中的文件支持，hugetlbfs代码确保每个节点包含一个预
+  留映射。因此，预留映射在创建节点时被分配。
+
+
+创建预留
+========
+当创建一个巨大的有页面支持的共享内存段（shmget(SHM_HUGETLB)）或通过mmap(MAP_HUGETLB)
+创建一个映射时，就会创建预留。这些操作会导致对函数hugetlb_reserve_pages()的调用::
+
+	int hugetlb_reserve_pages(struct inode *inode,
+				  long from, long to,
+				  struct vm_area_struct *vma,
+				  vm_flags_t vm_flags)
+
+hugetlb_reserve_pages()做的第一件事是检查在调用shmget()或mmap()时是否指定了NORESERVE
+标志。如果指定了NORESERVE，那么这个函数立即返回，因为不需要预留。
+
+参数'from'和'to'是映射或基础文件的巨页索引。对于shmget()，'from'总是0，'to'对应于段/映射
+的长度。对于mmap()，offset参数可以用来指定进入底层文件的偏移量。在这种情况下，'from'和'to'
+参数已经被这个偏移量所调整。
+
+PRIVATE和SHARED映射之间的一个很大的区别是预留在预留映射中的表示方式。
+
+- 对于共享映射，预留映射中的条目表示对应页面的预留存在或曾经存在。当预留被消耗时，预留映射不被
+  修改。
+- 对于私有映射，预留映射中没有条目表示相应页面存在预留。随着预留被消耗，条目被添加到预留映射中。
+  因此，预留映射也可用于确定哪些预留已被消耗。
+
+对于私有映射，hugetlb_reserve_pages()创建预留映射并将其挂在VMA结构体上。此外，
+HPAGE_RESV_OWNER标志被设置，以表明该VMA拥有预留。
+
+预留映射被查阅以确定当前映射/段需要多少巨页预留。对于私有映射，这始终是一个值（to - from）。
+然而，对于共享映射来说，一些预留可能已经存在于(to - from)的范围内。关于如何实现这一点的细节，
+请参见 :ref:`预留映射的修改 <resv_map_modifications>` 一节。
+
+该映射可能与一个子池（subpool）相关联。如果是这样，将查询子池以确保有足够的空间用于映射。子池
+有可能已经预留了可用于映射的预留空间。更多细节请参见 :ref: `子池预留 <sub_pool_resv>`
+一节。
+
+在咨询了预留映射和子池之后，就知道了需要的新预留数量。hugetlb_acct_memory()函数被调用以检查
+并获取所要求的预留数量。hugetlb_acct_memory()调用到可能分配和调整剩余页数的函数。然而，在这
+些函数中，代码只是检查以确保有足够的空闲的巨页来容纳预留。如果有的话，全局预留计数resv_huge_pages
+会被调整，如下所示::
+
+	if (resv_needed <= (resv_huge_pages - free_huge_pages))
+		resv_huge_pages += resv_needed;
+
+注意，在检查和调整这些计数器时，全局锁hugetlb_lock会被预留。
+
+如果有足够的空闲的巨页，并且全局计数resv_huge_pages被调整，那么与映射相关的预留映射被修改以
+反映预留。在共享映射的情况下，将存在一个file_region，包括'from'-'to'范围。对于私有映射，
+不对预留映射进行修改，因为没有条目表示存在预留。
+
+如果hugetlb_reserve_pages()成功，全局预留数和与映射相关的预留映射将根据需要被修改，以确保
+在'from'-'to'范围内存在预留。
+
+消耗预留/分配一个巨页
+===========================
+
+当与预留相关的巨页在相应的映射中被分配和实例化时，预留就被消耗了。该分配是在函数alloc_huge_page()
+中进行的::
+
+	struct page *alloc_huge_page(struct vm_area_struct *vma,
+				     unsigned long addr, int avoid_reserve)
+
+alloc_huge_page被传递给一个VMA指针和一个虚拟地址，因此它可以查阅预留映射以确定是否存在预留。
+此外，alloc_huge_page需要一个参数avoid_reserve，该参数表示即使看起来已经为指定的地址预留了
+预留，也不应该使用预留。avoid_reserve参数最常被用于写时拷贝和页面迁移的情况下，即现有页面的额
+外拷贝被分配。
+
+
+调用辅助函数vma_needs_reservation()来确定是否存在对映射(vma)中地址的预留。关于这个函数的详
+细内容，请参见 :ref:`预留映射帮助函数 <resv_map_helpers>` 一节。从
+vma_needs_reservation()返回的值通常为0或1。如果该地址存在预留，则为0，如果不存在预留，则为1。
+如果不存在预留，并且有一个与映射相关联的子池，则查询子池以确定它是否包含预留。如果子池包含预留，
+则可将其中一个用于该分配。然而，在任何情况下，avoid_reserve参数都会优先考虑为分配使用预留。在
+确定预留是否存在并可用于分配后，调用dequeue_huge_page_vma()函数。这个函数需要两个与预留有关
+的参数：
+
+- avoid_reserve，这是传递给alloc_huge_page()的同一个值/参数。
+- chg，尽管这个参数的类型是long，但只有0或1的值被传递给dequeue_huge_page_vma。如果该值为0，
+  则表明存在预留（关于可能的问题，请参见 “预留和内存策略” 一节）。如果值
+  为1，则表示不存在预留，如果可能的话，必须从全局空闲池中取出该页。
+
+与VMA的内存策略相关的空闲列表被搜索到一个空闲页。如果找到了一个页面，当该页面从空闲列表中移除时，
+free_huge_pages的值被递减。如果有一个与该页相关的预留，将进行以下调整::
+
+	SetPagePrivate(page);	/* 表示分配这个页面消耗了一个预留，
+				 * 如果遇到错误，以至于必须释放这个页面，预留将被
+				 * 恢复。 */
+	resv_huge_pages--;	/* 减少全局预留计数 */
+
+注意，如果找不到满足VMA内存策略的巨页，将尝试使用伙伴分配器分配一个。这就带来了超出预留范围
+的剩余巨页和超额分配的问题。即使分配了一个多余的页面，也会进行与上面一样的基于预留的调整:
+SetPagePrivate(page) 和 resv_huge_pages--.
+
+在获得一个新的巨页后，(page)->private被设置为与该页面相关的子池的值，如果它存在的话。当页
+面被释放时，这将被用于子池的计数。
+
+然后调用函数vma_commit_reservation()，根据预留的消耗情况调整预留映射。一般来说，这涉及
+到确保页面在区域映射的file_region结构体中被表示。对于预留存在的共享映射，预留映射中的条目
+已经存在，所以不做任何改变。然而，如果共享映射中没有预留，或者这是一个私有映射，则必须创建一
+个新的条目。
+
+注意，如果找不到满足VMA内存策略的巨页，将尝试使用伙伴分配器分配一个。这就带来了超出预留范围
+的剩余巨页和过度分配的问题。即使分配了一个多余的页面，也会进行与上面一样的基于预留的调整。
+SetPagePrivate(page)和resv_huge_pages-。
+
+在获得一个新的巨页后，(page)->private被设置为与该页面相关的子池的值，如果它存在的话。当页
+面被释放时，这将被用于子池的计数。
+
+然后调用函数vma_commit_reservation()，根据预留的消耗情况调整预留映射。一般来说，这涉及
+到确保页面在区域映射的file_region结构体中被表示。对于预留存在的共享映射，预留映射中的条目
+已经存在，所以不做任何改变。然而，如果共享映射中没有预留，或者这是一个私有映射，则必须创建
+一个新的条目。
+
+在alloc_huge_page()开始调用vma_needs_reservation()和页面分配后调用
+vma_commit_reservation()之间，预留映射有可能被改变。如果hugetlb_reserve_pages在共
+享映射中为同一页面被调用，这将是可能的。在这种情况下，预留计数和子池空闲页计数会有一个偏差。
+这种罕见的情况可以通过比较vma_needs_reservation和vma_commit_reservation的返回值来
+识别。如果检测到这种竞争，子池和全局预留计数将被调整以进行补偿。关于这些函数的更多信息，请
+参见 :ref:`预留映射帮助函数 <resv_map_helpers>` 一节。
+
+
+实例化巨页
+==========
+
+在巨页分配之后，页面通常被添加到分配任务的页表中。在此之前，共享映射中的页面被添加到页面缓
+存中，私有映射中的页面被添加到匿名反向映射中。在这两种情况下，PagePrivate标志被清除。因此，
+当一个已经实例化的巨页被释放时，不会对全局预留计数（resv_huge_pages）进行调整。
+
+
+释放巨页
+========
+
+巨页释放是由函数free_huge_page()执行的。这个函数是hugetlbfs复合页的析构器。因此，它只传
+递一个指向页面结构体的指针。当一个巨页被释放时，可能需要进行预留计算。如果该页与包含保
+留的子池相关联，或者该页在错误路径上被释放，必须恢复全局预留计数，就会出现这种情况。
+
+page->private字段指向与该页相关的任何子池。如果PagePrivate标志被设置，它表明全局预留计数
+应该被调整（关于如何设置这些标志的信息，请参见
+:ref: `消耗预留/分配一个巨页 <consume_resv>` ）。
+
+
+该函数首先调用hugepage_subpool_put_pages()来处理该页。如果这个函数返回一个0的值（不等于
+传递的1的值），它表明预留与子池相关联，这个新释放的页面必须被用来保持子池预留的数量超过最小值。
+因此，在这种情况下，全局resv_huge_pages计数器被递增。
+
+如果页面中设置了PagePrivate标志，那么全局resv_huge_pages计数器将永远被递增。
+
+子池预留
+========
+
+有一个结构体hstate与每个巨页尺寸相关联。hstate跟踪所有指定大小的巨页。一个子池代表一
+个hstate中的页面子集，它与一个已挂载的hugetlbfs文件系统相关
+
+当一个hugetlbfs文件系统被挂载时，可以指定min_size选项，它表示文件系统所需的最小的巨页数量。
+如果指定了这个选项，与min_size相对应的巨页的数量将被预留给文件系统使用。这个数字在结构体
+hugepage_subpool的min_hpages字段中被跟踪。在挂载时，hugetlb_acct_memory(min_hpages)
+被调用以预留指定数量的巨页。如果它们不能被预留，挂载就会失败。
+
+当从子池中获取或释放页面时，会调用hugepage_subpool_get/put_pages()函数。
+hugepage_subpool_get/put_pages被传递给巨页数量，以此来调整子池的 “已用页面” 计数
+（get为下降，put为上升）。通常情况下，如果子池中没有足够的页面，它们会返回与传递的相同的值或
+一个错误。
+
+然而，如果预留与子池相关联，可能会返回一个小于传递值的返回值。这个返回值表示必须进行的额外全局
+池调整的数量。例如，假设一个子池包含3个预留的巨页，有人要求5个。与子池相关的3个预留页可以用来
+满足部分请求。但是，必须从全局池中获得2个页面。为了向调用者转达这一信息，将返回值2。然后，调用
+者要负责从全局池中获取另外两个页面。
+
+
+COW和预留
+==========
+
+由于共享映射都指向并使用相同的底层页面，COW最大的预留问题是私有映射。在这种情况下，两个任务可
+以指向同一个先前分配的页面。一个任务试图写到该页，所以必须分配一个新的页，以便每个任务都指向它
+自己的页。
+
+当该页最初被分配时，该页的预留被消耗了。当由于COW而试图分配一个新的页面时，有可能没有空闲的巨
+页，分配会失败。
+
+当最初创建私有映射时，通过设置所有者的预留映射指针中的HPAGE_RESV_OWNER位来标记映射的所有者。
+由于所有者创建了映射，所有者拥有与映射相关的所有预留。因此，当一个写异常发生并且没有可用的页面
+时，对预留的所有者和非所有者采取不同的行动。
+
+在发生异常的任务不是所有者的情况下，异常将失败，该任务通常会收到一个SIGBUS。
+
+如果所有者是发生异常的任务，我们希望它能够成功，因为它拥有原始的预留。为了达到这个目的，该页被
+从非所有者任务中解映射出来。这样一来，唯一的引用就是来自拥有者的任务。此外，HPAGE_RESV_UNMAPPED
+位被设置在非拥有任务的预留映射指针中。如果非拥有者任务后来在一个不存在的页面上发生异常，它可能
+会收到一个SIGBUS。但是，映射/预留的原始拥有者的行为将与预期一致。
+
+预留映射的修改
+==============
+
+以下低级函数用于对预留映射进行修改。通常情况下，这些函数不会被直接调用。而是调用一个预留映射辅
+助函数，该函数调用这些低级函数中的一个。这些低级函数在源代码（mm/hugetlb.c）中得到了相当好的
+记录。这些函数是::
+
+	long region_chg(struct resv_map *resv, long f, long t);
+	long region_add(struct resv_map *resv, long f, long t);
+	void region_abort(struct resv_map *resv, long f, long t);
+	long region_count(struct resv_map *resv, long f, long t);
+
+在预留映射上的操作通常涉及两个操作:
+
+1) region_chg()被调用来检查预留映射，并确定在指定的范围[f, t]内有多少页目前没有被代表。
+
+   调用代码执行全局检查和分配，以确定是否有足够的巨页使操作成功。
+
+2)
+  a) 如果操作能够成功，regi_add()将被调用，以实际修改先前传递给regi_chg()的相同范围
+     [f, t]的预留映射。
+  b) 如果操作不能成功，region_abort被调用，在相同的范围[f, t]内中止操作。
+
+注意，这是一个两步的过程， region_add()和 region_abort()在事先调用 region_chg()后保证
+成功。 region_chg()负责预先分配任何必要的数据结构以确保后续操作（特别是 region_add()）的
+成功。
+
+如上所述，region_chg()确定该范围内当前没有在映射中表示的页面的数量。region_add()返回添加
+到映射中的范围内的页数。在大多数情况下， region_add() 的返回值与 region_chg() 的返回值相
+同。然而，在共享映射的情况下，有可能在调用 region_chg() 和 region_add() 之间对预留映射进
+行更改。在这种情况下，regi_add()的返回值将与regi_chg()的返回值不符。在这种情况下，全局计数
+和子池计数很可能是不正确的，需要调整。检查这种情况并进行适当的调整是调用者的责任。
+
+函数region_del()被调用以从预留映射中移除区域。
+它通常在以下情况下被调用:
+
+- 当hugetlbfs文件系统中的一个文件被删除时，该节点将被释放，预留映射也被释放。在释放预留映射
+  之前，所有单独的file_region结构体必须被释放。在这种情况下，region_del的范围是[0, LONG_MAX]。
+- 当一个hugetlbfs文件正在被截断时。在这种情况下，所有在新文件大小之后分配的页面必须被释放。
+  此外，预留映射中任何超过新文件大小的file_region条目必须被删除。在这种情况下，region_del
+  的范围是[new_end_of_file, LONG_MAX]。
+- 当在一个hugetlbfs文件中打洞时。在这种情况下，巨页被一次次从文件的中间移除。当这些页被移除
+  时，region_del()被调用以从预留映射中移除相应的条目。在这种情况下，region_del被传递的范
+  围是[page_idx, page_idx + 1]。
+
+在任何情况下，region_del()都会返回从预留映射中删除的页面数量。在非常罕见的情况下，region_del()
+会失败。这只能发生在打洞的情况下，即它必须分割一个现有的file_region条目，而不能分配一个新的
+结构体。在这种错误情况下，region_del()将返回-ENOMEM。这里的问题是，预留映射将显示对该页有
+预留。然而，子池和全局预留计数将不反映该预留。为了处理这种情况，调用函数hugetlb_fix_reserve_counts()
+来调整计数器，使其与不能被删除的预留映射条目相对应。
+
+region_count()在解除私有巨页映射时被调用。在私有映射中，预留映射中没有条目表明存在一个预留。
+因此，通过计算预留映射中的条目数，我们知道有多少预留被消耗了，有多少预留是未完成的
+（Outstanding = (end - start) - region_count（resv, start, end））。由于映射正在消
+失，子池和全局预留计数被未完成的预留数量所减去。
+
+预留映射帮助函数
+================
+
+有几个辅助函数可以查询和修改预留映射。这些函数只对特定的巨页的预留感兴趣，所以它们只是传入一个
+地址而不是一个范围。此外，它们还传入相关的VMA。从VMA中，可以确定映射的类型（私有或共享）和预留
+映射的位置（inode或VMA）。这些函数只是调用 “预留映射的修改” 一节中描述的基础函数。然而，
+它们确实考虑到了私有和共享映射的预留映射条目的 “相反” 含义，并向调用者隐藏了这个细节::
+
+	long vma_needs_reservation(struct hstate *h,
+				   struct vm_area_struct *vma,
+				   unsigned long addr)
+
+该函数为指定的页面调用 region_chg()。如果不存在预留，则返回1。如果存在预留，则返回0::
+
+	long vma_commit_reservation(struct hstate *h,
+				    struct vm_area_struct *vma,
+				    unsigned long addr)
+
+这将调用 region_add()，用于指定的页面。与region_chg和region_add的情况一样，该函数应在
+先前调用的vma_needs_reservation后调用。它将为该页添加一个预留条目。如果预留被添加，它将
+返回1，如果没有则返回0。返回值应与之前调用vma_needs_reservation的返回值进行比较。如果出
+现意外的差异，说明在两次调用之间修改了预留映射::
+
+	void vma_end_reservation(struct hstate *h,
+				 struct vm_area_struct *vma,
+				 unsigned long addr)
+
+这将调用指定页面的 region_abort()。与region_chg和region_abort的情况一样，该函数应在
+先前调用的vma_needs_reservation后被调用。它将中止/结束正在进行的预留添加操作::
+
+	long vma_add_reservation(struct hstate *h,
+				 struct vm_area_struct *vma,
+				 unsigned long addr)
+
+这是一个特殊的包装函数，有助于在错误路径上清理预留。它只从repare_reserve_on_error()函数
+中调用。该函数与vma_needs_reservation一起使用，试图将一个预留添加到预留映射中。它考虑到
+了私有和共享映射的不同预留映射语义。因此，region_add被调用用于共享映射（因为映射中的条目表
+示预留），而region_del被调用用于私有映射（因为映射中没有条目表示预留）。关于在错误路径上需
+要做什么的更多信息，请参见  “错误路径中的预留清理”  。
+
+
+错误路径中的预留清理
+====================
+
+正如在:ref:`预留映射帮助函数<resv_map_helpers>` 一节中提到的，预留的修改分两步进行。首
+先，在分配页面之前调用vma_needs_reservation。如果分配成功，则调用vma_commit_reservation。
+如果不是，则调用vma_end_reservation。全局和子池的预留计数根据操作的成功或失败进行调整，
+一切都很好。
+
+此外，在一个巨页被实例化后，PagePrivate标志被清空，这样，当页面最终被释放时，计数是
+正确的。
+
+然而，有几种情况是，在一个巨页被分配后，但在它被实例化之前，就遇到了错误。在这种情况下，
+页面分配已经消耗了预留，并进行了适当的子池、预留映射和全局计数调整。如果页面在这个时候被释放
+（在实例化和清除PagePrivate之前），那么free_huge_page将增加全局预留计数。然而，预留映射
+显示报留被消耗了。这种不一致的状态将导致预留的巨页的 “泄漏” 。全局预留计数将比它原本的要高，
+并阻止分配一个预先分配的页面。
+
+函数 restore_reserve_on_error() 试图处理这种情况。它有相当完善的文档。这个函数的目的
+是将预留映射恢复到页面分配前的状态。通过这种方式，预留映射的状态将与页面释放后的全局预留计
+数相对应。
+
+函数restore_reserve_on_error本身在试图恢复预留映射条目时可能会遇到错误。在这种情况下，
+它将简单地清除该页的PagePrivate标志。这样一来，当页面被释放时，全局预留计数将不会被递增。
+然而，预留映射将继续看起来像预留被消耗了一样。一个页面仍然可以被分配到该地址，但它不会像最
+初设想的那样使用一个预留页。
+
+有一些代码（最明显的是userfaultfd）不能调用restore_reserve_on_error。在这种情况下，
+它简单地修改了PagePrivate，以便在释放巨页时不会泄露预留。
+
+
+预留和内存策略
+==============
+当git第一次被用来管理Linux代码时，每个节点的巨页列表就存在于hstate结构中。预留的概念是
+在一段时间后加入的。当预留被添加时，没有尝试将内存策略考虑在内。虽然cpusets与内存策略不
+完全相同，但hugetlb_acct_memory中的这个注释总结了预留和cpusets/内存策略之间的相互作
+用::
+
+
+	/*
+	 * 当cpuset被配置时，它打破了严格的hugetlb页面预留，因为计数是在一个全局变量上完
+	 * 成的。在有cpuset的情况下，这样的预留完全是垃圾，因为预留没有根据当前cpuset的
+	 * 页面可用性来检查。在任务所在的cpuset中缺乏空闲的htlb页面时，应用程序仍然有可能
+	 * 被内核OOM'ed。试图用cpuset来执行严格的计数几乎是不可能的（或者说太难看了），因
+	 * 为cpuset太不稳定了，任务或内存节点可以在cpuset之间动态移动。与cpuset共享
+	 * hugetlb映射的语义变化是不可取的。然而，为了预留一些语义，我们退回到检查当前空闲
+	 * 页的可用性，作为一种最好的尝试，希望能将cpuset改变语义的影响降到最低。
+	 */
+
+添加巨页预留是为了防止在缺页异常时出现意外的页面分配失败（OOM）。然而，如果一个应用
+程序使用cpusets或内存策略，就不能保证在所需的节点上有巨页可用。即使有足够数量的全局
+预留，也是如此。
+
+Hugetlbfs回归测试
+=================
+
+最完整的hugetlb测试集在libhugetlbfs仓库。如果你修改了任何hugetlb相关的代码，请使用
+libhugetlbfs测试套件来检查回归情况。此外，如果你添加了任何新的hugetlb功能，请在
+libhugetlbfs中添加适当的测试。
+
+--
+Mike Kravetz，2017年4月7日
diff --git a/Documentation/translations/zh_CN/vm/hwpoison.rst b/Documentation/translations/zh_CN/vm/hwpoison.rst
new file mode 100644
index 000000000000..c6e1e7bdb05b
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/hwpoison.rst
@@ -0,0 +1,166 @@
+
+:Original: Documentation/vm/hwpoison.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+========
+hwpoison
+========
+
+什么是hwpoison?
+===============
+
+
+即将推出的英特尔CPU支持从一些内存错误中恢复（ ``MCA恢复`` ）。这需要操作系统宣布
+一个页面"poisoned"，杀死与之相关的进程，并避免在未来使用它。
+
+这个补丁包在虚拟机中实现了必要的(编程)框架。
+
+引用概述中的评论::
+
+	高级机器的检查与处理。处理方法是损坏的页面被硬件报告，通常是由于2位ECC内
+	存或高速缓存故障。
+
+	这主要是针对在后台检测到的损坏的页面。当当前的CPU试图访问它时，当前运行的进程
+	可以直接被杀死。因为还没有访问损坏的页面, 如果错误由于某种原因不能被处理，就可
+	以安全地忽略它. 而不是用另外一个机器检查去处理它。
+
+	处理不同状态的页面缓存页。这里棘手的部分是，相对于其他虚拟内存用户， 我们可以异
+	步访问任何页面。因为内存故障可能随时随地发生，可能违反了他们的一些假设。这就是
+	为什么这段代码必须非常小心。一般来说，它试图使用正常的锁规则，如获得标准锁，即使
+	这意味着错误处理可能需要很长的时间。
+
+	这里的一些操作有点低效，并且具有非线性的算法复杂性，因为数据结构没有针对这种情
+	况进行优化。特别是从vma到进程的映射就是这种情况。由于这种情况大概率是罕见的，所
+	以我们希望我们可以摆脱这种情况。
+
+该代码由mm/memory-failure.c中的高级处理程序、一个新的页面poison位和虚拟机中的
+各种检查组成，用来处理poison的页面。
+
+现在主要目标是KVM客户机，但它适用于所有类型的应用程序。支持KVM需要最近的qemu-kvm
+版本。
+
+对于KVM的使用，需要一个新的信号类型，这样KVM就可以用适当的地址将机器检查注入到客户
+机中。这在理论上也允许其他应用程序处理内存故障。我们的期望是，所有的应用程序都不要这
+样做，但一些非常专业的应用程序可能会这样做。
+
+故障恢复模式
+============
+
+有两种（实际上是三种）模式的内存故障恢复可以在。
+
+vm.memory_failure_recovery sysctl 置零:
+	所有的内存故障都会导致panic。请不要尝试恢复。
+
+早期处理
+	(可以在全局和每个进程中控制) 一旦检测到错误，立即向应用程序发送SIGBUS这允许
+	应用程序以温和的方式处理内存错误（例如，放弃受影响的对象） 这是KVM qemu使用的
+	模式。
+
+推迟处理
+	当应用程序运行到损坏的页面时，发送SIGBUS。这对不知道内存错误的应用程序来说是
+	最好的，默认情况下注意一些页面总是被当作late kill处理。
+
+用户控制
+========
+
+vm.memory_failure_recovery
+	参阅 sysctl.txt
+
+vm.memory_failure_early_kill
+	全局启用early kill
+
+PR_MCE_KILL
+	设置early/late kill mode/revert 到系统默认值。
+
+	arg1: PR_MCE_KILL_CLEAR:
+		恢复到系统默认值
+	arg1: PR_MCE_KILL_SET:
+		arg2定义了线程特定模式
+
+		PR_MCE_KILL_EARLY:
+			Early kill
+		PR_MCE_KILL_LATE:
+			Late kill
+		PR_MCE_KILL_DEFAULT
+			使用系统全局默认值
+
+	注意，如果你想有一个专门的线程代表进程处理SIGBUS(BUS_MCEERR_AO)，你应该在
+	指定线程上调用prctl(PR_MCE_KILL_EARLY)。否则，SIGBUS将被发送到主线程。
+
+PR_MCE_KILL_GET
+	返回当前模式
+
+测试
+====
+
+* madvise(MADV_HWPOISON, ....) (as root) - 在测试过程中Poison一个页面
+
+* 通过debugfs ``/sys/kernel/debug/hwpoison/`` hwpoison-inject模块
+
+  corrupt-pfn
+	在PFN处注入hwpoison故障，并echoed到这个文件。这做了一些早期过滤，以避
+	免在测试套件中损坏非预期页面。
+  unpoison-pfn
+	在PFN的Software-unpoison页面对应到这个文件。这样，一个页面可以再次被
+	复用。这只对Linux注入的故障起作用，对真正的内存故障不起作用。
+
+  注意这些注入接口并不稳定，可能会在不同的内核版本中发生变化
+
+  corrupt-filter-dev-major, corrupt-filter-dev-minor
+	只处理与块设备major/minor定义的文件系统相关的页面的内存故障。-1U是通
+	配符值。这应该只用于人工注入的测试。
+
+  corrupt-filter-memcg
+	限制注入到memgroup拥有的页面。由memcg的inode号指定。
+
+	Example::
+
+		mkdir /sys/fs/cgroup/mem/hwpoison
+
+	        usemem -m 100 -s 1000 &
+		echo `jobs -p` > /sys/fs/cgroup/mem/hwpoison/tasks
+
+		memcg_ino=$(ls -id /sys/fs/cgroup/mem/hwpoison | cut -f1 -d' ')
+		echo $memcg_ino > /debug/hwpoison/corrupt-filter-memcg
+
+		page-types -p `pidof init`   --hwpoison  # shall do nothing
+		page-types -p `pidof usemem` --hwpoison  # poison its pages
+
+  corrupt-filter-flags-mask, corrupt-filter-flags-value
+	当指定时，只有在((page_flags & mask) == value)的情况下才会poison页面。
+	这允许对许多种类的页面进行压力测试。page_flags与/proc/kpageflags中的相
+	同。这些标志位在include/linux/kernel-page-flags.h中定义，并在
+	Documentation/admin-guide/mm/pagemap.rst中记录。
+
+* 架构特定的MCE注入器
+
+  x86 有 mce-inject, mce-test
+
+  在mce-test中的一些便携式hwpoison测试程序，见下文。
+
+引用
+====
+
+http://halobates.de/mce-lc09-2.pdf
+	09年LinuxCon的概述演讲
+
+git://git.kernel.org/pub/scm/utils/cpu/mce/mce-test.git
+	测试套件（在tsrc中的hwpoison特定可移植测试）。
+
+git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git
+	x86特定的注入器
+
+
+限制
+====
+- 不是所有的页面类型都被支持，而且永远不会。大多数内核内部对象不能被恢
+  复，目前只有LRU页。
+
+---
+Andi Kleen, 2009年10月
diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst
index a1d2f0356cc1..a1c6d529b6ff 100644
--- a/Documentation/translations/zh_CN/vm/index.rst
+++ b/Documentation/translations/zh_CN/vm/index.rst
@@ -27,27 +27,28 @@ TODO:待引用文档集被翻译完毕后请及时修改此处）
    free_page_reporting
    highmem
    ksm
+   frontswap
+   hmm
+   hwpoison
+   hugetlbfs_reserv
+   memory-model
+   mmu_notifier
+   numa
+   overcommit-accounting
+   page_frags
+   page_owner
+   page_table_check
+   remap_file_pages
+   split_page_table_lock
+   z3fold
+   zsmalloc
 
 TODOLIST:
 * arch_pgtable_helpers
 * free_page_reporting
-* frontswap
-* hmm
-* hwpoison
 * hugetlbfs_reserv
-* memory-model
-* mmu_notifier
-* numa
-* overcommit-accounting
 * page_migration
-* page_frags
-* page_owner
-* page_table_check
-* remap_file_pages
 * slub
-* split_page_table_lock
 * transhuge
 * unevictable-lru
 * vmalloced-kernel-stacks
-* z3fold
-* zsmalloc
diff --git a/Documentation/translations/zh_CN/vm/memory-model.rst b/Documentation/translations/zh_CN/vm/memory-model.rst
new file mode 100644
index 000000000000..013e30c88d72
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/memory-model.rst
@@ -0,0 +1,135 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Original: Documentation/vm/memory-model.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+============
+物理内存模型
+============
+
+系统中的物理内存可以用不同的方式进行寻址。最简单的情况是，物理内存从地址0开
+始，跨越一个连续的范围，直到最大的地址。然而，这个范围可能包含CPU无法访问的
+小孔隙。那么，在完全不同的地址可能有几个连续的范围。而且，别忘了NUMA，即不
+同的内存库连接到不同的CPU。
+
+Linux使用两种内存模型中的一种对这种多样性进行抽象。FLATMEM和SPARSEM。每
+个架构都定义了它所支持的内存模型，默认的内存模型是什么，以及是否有可能手动
+覆盖该默认值。
+
+所有的内存模型都使用排列在一个或多个数组中的 `struct page` 来跟踪物理页
+帧的状态。
+
+无论选择哪种内存模型，物理页框号（PFN）和相应的 `struct page` 之间都存
+在一对一的映射关系。
+
+每个内存模型都定义了 :c:func:`pfn_to_page` 和 :c:func:`page_to_pfn`
+帮助函数，允许从PFN到 `struct page` 的转换，反之亦然。
+
+FLATMEM
+=======
+
+最简单的内存模型是FLATMEM。这个模型适用于非NUMA系统的连续或大部分连续的
+物理内存。
+
+在FLATMEM内存模型中，有一个全局的 `mem_map` 数组来映射整个物理内存。对
+于大多数架构，孔隙在 `mem_map` 数组中都有条目。与孔洞相对应的 `struct page`
+对象从未被完全初始化。
+
+为了分配 `mem_map` 数组，架构特定的设置代码应该调用free_area_init()函数。
+然而，在调用memblock_free_all()函数之前，映射数组是不能使用的，该函数
+将所有的内存交给页分配器。
+
+一个架构可能会释放 `mem_map` 数组中不包括实际物理页的部分。在这种情况下，特
+定架构的 :c:func:`pfn_valid` 实现应该考虑到 `mem_map` 中的孔隙。
+
+使用FLATMEM，PFN和 `struct page` 之间的转换是直接的。 `PFN - ARCH_PFN_OFFSET`
+是 `mem_map` 数组的一个索引。
+
+`ARCH_PFN_OFFSET` 定义了物理内存起始地址不同于0的系统的第一个页框号。
+
+SPARSEMEM
+=========
+
+SPARSEMEM是Linux中最通用的内存模型，它是唯一支持若干高级功能的内存模型，
+如物理内存的热插拔、非易失性内存设备的替代内存图和较大系统的内存图的延迟
+初始化。
+
+SPARSEMEM模型将物理内存显示为一个部分的集合。一个区段用mem_section结构
+体表示，它包含 `section_mem_map` ，从逻辑上讲，它是一个指向 `struct page`
+阵列的指针。然而，它被存储在一些其他的magic中，以帮助分区管理。区段的大小
+和最大区段数是使用 `SECTION_SIZE_BITS` 和 `MAX_PHYSMEM_BITS` 常量
+来指定的，这两个常量是由每个支持SPARSEMEM的架构定义的。 `MAX_PHYSMEM_BITS`
+是一个架构所支持的物理地址的实际宽度，而 `SECTION_SIZE_BITS` 是一个任
+意的值。
+
+最大的段数表示为 `NR_MEM_SECTIONS` ，定义为
+
+.. math::
+
+   NR\_MEM\_SECTIONS = 2 ^ {(MAX\_PHYSMEM\_BITS - SECTION\_SIZE\_BITS)}
+
+`mem_section` 对象被安排在一个叫做 `mem_sections` 的二维数组中。这个数组的
+大小和位置取决于 `CONFIG_SPARSEM_EXTREME` 和可能的最大段数:
+
+* 当 `CONFIG_SPARSEMEM_EXTREME` 被禁用时， `mem_sections` 数组是静态的，有
+  `NR_MEM_SECTIONS` 行。每一行持有一个 `mem_section` 对象。
+* 当 `CONFIG_SPARSEMEM_EXTREME` 被启用时， `mem_sections` 数组被动态分配。
+  每一行包含价值 `PAGE_SIZE` 的 `mem_section` 对象，行数的计算是为了适应所有的
+  内存区。
+
+架构设置代码应该调用sparse_init()来初始化内存区和内存映射。
+
+通过SPARSEMEM，有两种可能的方式将PFN转换为相应的 `struct page` --"classic sparse"和
+ "sparse vmemmap"。选择是在构建时进行的，它由 `CONFIG_SPARSEMEM_VMEMMAP` 的
+ 值决定。
+
+Classic sparse在page->flags中编码了一个页面的段号，并使用PFN的高位来访问映射该页
+框的段。在一个区段内，PFN是指向页数组的索引。
+
+Sparse vmemmapvmemmap使用虚拟映射的内存映射来优化pfn_to_page和page_to_pfn操
+作。有一个全局的 `struct page *vmemmap` 指针，指向一个虚拟连续的 `struct page`
+对象阵列。PFN是该数组的一个索引，`struct page` 从 `vmemmap` 的偏移量是该页的PFN。
+
+为了使用vmemmap，一个架构必须保留一个虚拟地址的范围，以映射包含内存映射的物理页，并
+确保 `vmemmap`指向该范围。此外，架构应该实现 :c:func:`vmemmap_populate` 方法，
+它将分配物理内存并为虚拟内存映射创建页表。如果一个架构对vmemmap映射没有任何特殊要求，
+它可以使用通用内存管理提供的默认 :c:func:`vmemmap_populate_basepages`。
+
+虚拟映射的内存映射允许将持久性内存设备的 `struct page` 对象存储在这些设备上预先分
+配的存储中。这种存储用vmem_altmap结构表示，最终通过一长串的函数调用传递给
+vmemmap_populate()。vmemmap_populate()实现可以使用 `vmem_altmap` 和
+:c:func:`vmemmap_alloc_block_buf` 助手来分配持久性内存设备上的内存映射。
+
+ZONE_DEVICE
+===========
+`ZONE_DEVICE` 设施建立在 `SPARSEM_VMEMMAP` 之上，为设备驱动识别的物理地址范
+围提供 `struct page` `mem_map` 服务。 `ZONE_DEVICE` 的 "设备" 方面与以下
+事实有关：这些地址范围的页面对象从未被在线标记过，而且必须对设备进行引用，而不仅仅
+是页面，以保持内存被“锁定”以便使用。 `ZONE_DEVICE` ，通过 :c:func:`devm_memremap_pages` ，
+为给定的pfns范围执行足够的内存热插拔来开启 :c:func:`pfn_to_page`，
+:c:func:`page_to_pfn`, ，和 :c:func:`get_user_pages` 服务。由于页面引
+用计数永远不会低于1，所以页面永远不会被追踪为空闲内存，页面的 `struct list_head lru`
+空间被重新利用，用于向映射该内存的主机设备/驱动程序进行反向引用。
+
+虽然 `SPARSEMEM` 将内存作为一个区段的集合，可以选择收集并合成内存块，但
+`ZONE_DEVICE` 用户需要更小的颗粒度来填充 `mem_map` 。鉴于 `ZONE_DEVICE`
+内存从未被在线标记，因此它的内存范围从未通过sysfs内存热插拔api暴露在内存块边界
+上。这个实现依赖于这种缺乏用户接口的约束，允许子段大小的内存范围被指定给
+:c:func:`arch_add_memory` ，即内存热插拔的上半部分。子段支持允许2MB作为
+:c:func:`devm_memremap_pages` 的跨架构通用对齐颗粒度。
+
+`ZONE_DEVICE` 的用户是:
+
+* pmem: 通过DAX映射将平台持久性内存作为直接I/O目标使用。
+
+* hmm: 用 `->page_fault()` 和 `->page_free()` 事件回调扩展 `ZONE_DEVICE` ，
+  以允许设备驱动程序协调与设备内存相关的内存管理事件，通常是GPU内存。参见/vm/hmm.rst。
+
+* p2pdma: 创建 `struct page` 对象，允许PCI/E拓扑结构中的peer设备协调它们之间的
+  直接DMA操作，即绕过主机内存。
diff --git a/Documentation/translations/zh_CN/vm/mmu_notifier.rst b/Documentation/translations/zh_CN/vm/mmu_notifier.rst
new file mode 100644
index 000000000000..b29a37b33628
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/mmu_notifier.rst
@@ -0,0 +1,97 @@
+:Original: Documentation/vm/mmu_notifier.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+
+什么时候需要页表锁内通知？
+==========================
+
+当清除一个pte/pmd时，我们可以选择通过在页表锁下（通知版的\*_clear_flush调用
+mmu_notifier_invalidate_range）通知事件。但这种通知并不是在所有情况下都需要的。
+
+对于二级TLB（非CPU TLB），如IOMMU TLB或设备TLB（当设备使用类似ATS/PASID的东西让
+IOMMU走CPU页表来访问进程的虚拟地址空间）。只有两种情况需要在清除pte/pmd时在持有页
+表锁的同时通知这些二级TLB：
+
+  A) 在mmu_notifier_invalidate_range_end()之前，支持页的地址被释放。
+  B) 一个页表项被更新以指向一个新的页面（COW，零页上的写异常，__replace_page()，...）。
+
+情况A很明显，你不想冒风险让设备写到一个现在可能被一些完全不同的任务使用的页面。
+
+情况B更加微妙。为了正确起见，它需要按照以下序列发生:
+
+  - 上页表锁
+  - 清除页表项并通知 ([pmd/pte]p_huge_clear_flush_notify())
+  - 设置页表项以指向新页
+
+如果在设置新的pte/pmd值之前，清除页表项之后没有进行通知，那么你就会破坏设备的C11或
+C++11等内存模型。
+
+考虑以下情况（设备使用类似于ATS/PASID的功能）。
+
+两个地址addrA和addrB，这样|addrA - addrB| >= PAGE_SIZE，我们假设它们是COW的
+写保护（B的其他情况也适用）。
+
+::
+
+ [Time N] --------------------------------------------------------------------
+ CPU-thread-0  {尝试写到addrA}
+ CPU-thread-1  {尝试写到addrB}
+ CPU-thread-2  {}
+ CPU-thread-3  {}
+ DEV-thread-0  {读取addrA并填充设备TLB}
+ DEV-thread-2  {读取addrB并填充设备TLB}
+ [Time N+1] ------------------------------------------------------------------
+ CPU-thread-0  {COW_step0: {mmu_notifier_invalidate_range_start(addrA)}}
+ CPU-thread-1  {COW_step0: {mmu_notifier_invalidate_range_start(addrB)}}
+ CPU-thread-2  {}
+ CPU-thread-3  {}
+ DEV-thread-0  {}
+ DEV-thread-2  {}
+ [Time N+2] ------------------------------------------------------------------
+ CPU-thread-0  {COW_step1: {更新页表以指向addrA的新页}}
+ CPU-thread-1  {COW_step1: {更新页表以指向addrB的新页}}
+ CPU-thread-2  {}
+ CPU-thread-3  {}
+ DEV-thread-0  {}
+ DEV-thread-2  {}
+ [Time N+3] ------------------------------------------------------------------
+ CPU-thread-0  {preempted}
+ CPU-thread-1  {preempted}
+ CPU-thread-2  {写入addrA，这是对新页面的写入}
+ CPU-thread-3  {}
+ DEV-thread-0  {}
+ DEV-thread-2  {}
+ [Time N+3] ------------------------------------------------------------------
+ CPU-thread-0  {preempted}
+ CPU-thread-1  {preempted}
+ CPU-thread-2  {}
+ CPU-thread-3  {写入addrB，这是一个写入新页的过程}
+ DEV-thread-0  {}
+ DEV-thread-2  {}
+ [Time N+4] ------------------------------------------------------------------
+ CPU-thread-0  {preempted}
+ CPU-thread-1  {COW_step3: {mmu_notifier_invalidate_range_end(addrB)}}
+ CPU-thread-2  {}
+ CPU-thread-3  {}
+ DEV-thread-0  {}
+ DEV-thread-2  {}
+ [Time N+5] ------------------------------------------------------------------
+ CPU-thread-0  {preempted}
+ CPU-thread-1  {}
+ CPU-thread-2  {}
+ CPU-thread-3  {}
+ DEV-thread-0  {从旧页中读取addrA}
+ DEV-thread-2  {从新页面读取addrB}
+
+所以在这里，因为在N+2的时候，清空页表项没有和通知一起作废二级TLB，设备在看到addrA的新值之前
+就看到了addrB的新值。这就破坏了设备的总内存序。
+
+当改变一个pte的写保护或指向一个新的具有相同内容的写保护页（KSM）时，将mmu_notifier_invalidate_range
+调用延迟到页表锁外的mmu_notifier_invalidate_range_end()是可以的。即使做页表更新的线程
+在释放页表锁后但在调用mmu_notifier_invalidate_range_end()前被抢占，也是如此。
diff --git a/Documentation/translations/zh_CN/vm/numa.rst b/Documentation/translations/zh_CN/vm/numa.rst
new file mode 100644
index 000000000000..6af412b924ad
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/numa.rst
@@ -0,0 +1,101 @@
+:Original: Documentation/vm/numa.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+始于1999年11月，作者： <kanoj@sgi.com>
+
+==========================
+何为非统一内存访问(NUMA)？
+==========================
+
+这个问题可以从几个视角来回答：硬件观点和Linux软件视角。
+
+从硬件角度看，NUMA系统是一个由多个组件或装配组成的计算机平台，每个组件可能包含0个或更多的CPU、
+本地内存和/或IO总线。为了简洁起见，并将这些物理组件/装配的硬件视角与软件抽象区分开来，我们在
+本文中称这些组件/装配为“单元”。
+
+每个“单元”都可以看作是系统的一个SMP[对称多处理器]子集——尽管独立的SMP系统所需的一些组件可能
+不会在任何给定的单元上填充。NUMA系统的单元通过某种系统互连连接在一起——例如，交叉开关或点对点
+链接是NUMA系统互连的常见类型。这两种类型的互连都可以聚合起来，以创建NUMA平台，其中的单元与其
+他单元有多个距离。
+
+对于Linux，感兴趣的NUMA平台主要是所谓的缓存相干NUMA--简称ccNUMA系统系统。在ccNUMA系统中，
+所有的内存都是可见的，并且可以从连接到任何单元的任何CPU中访问，缓存一致性是由处理器缓存和/或
+系统互连在硬件中处理。
+
+内存访问时间和有效的内存带宽取决于包含CPU的单元或进行内存访问的IO总线距离包含目标内存的单元
+有多远。例如，连接到同一单元的CPU对内存的访问将比访问其他远程单元的内存经历更快的访问时间和
+更高的带宽。 NUMA平台可以在任何给定单元上访问多种远程距离的（其他）单元。
+
+平台供应商建立NUMA系统并不只是为了让软件开发人员的生活变得有趣。相反，这种架构是提供可扩展
+内存带宽的一种手段。然而，为了实现可扩展的内存带宽，系统和应用软件必须安排大部分的内存引用
+[cache misses]到“本地”内存——同一单元的内存，如果有的话——或者到最近的有内存的单元。
+
+这就自然而然有了Linux软件对NUMA系统的视角:
+
+Linux将系统的硬件资源划分为多个软件抽象，称为“节点”。Linux将节点映射到硬件平台的物理单元
+上，对一些架构的细节进行了抽象。与物理单元一样，软件节点可能包含0或更多的CPU、内存和/或IO
+总线。同样，对“较近”节点的内存访问——映射到较近单元的节点——通常会比对较远单元的访问经历更快
+的访问时间和更高的有效带宽。
+
+对于一些架构，如x86，Linux将“隐藏”任何代表没有内存连接的物理单元的节点，并将连接到该单元
+的任何CPU重新分配到代表有内存的单元的节点上。因此，在这些架构上，我们不能假设Linux将所有
+的CPU与一个给定的节点相关联，会看到相同的本地内存访问时间和带宽。
+
+此外，对于某些架构，同样以x86为例，Linux支持对额外节点的仿真。对于NUMA仿真，Linux会将现
+有的节点或者非NUMA平台的系统内存分割成多个节点。每个模拟的节点将管理底层单元物理内存的一部
+分。NUMA仿真对于在非NUMA平台上测试NUMA内核和应用功能是非常有用的，当与cpusets一起使用时，
+可以作为一种内存资源管理机制。[见 Documentation/admin-guide/cgroup-v1/cpusets.rst]
+
+对于每个有内存的节点，Linux构建了一个独立的内存管理子系统，有自己的空闲页列表、使用中页列表、
+使用统计和锁来调解访问。此外，Linux为每个内存区[DMA、DMA32、NORMAL、HIGH_MEMORY、MOVABLE
+中的一个或多个]构建了一个有序的“区列表”。zonelist指定了当一个选定的区/节点不能满足分配请求
+时要访问的区/节点。当一个区没有可用的内存来满足请求时，这种情况被称为“overflow 溢出”或
+“fallback 回退”。
+
+由于一些节点包含多个包含不同类型内存的区，Linux必须决定是否对区列表进行排序，使分配回退到不同
+节点上的相同区类型，或同一节点上的不同区类型。这是一个重要的考虑因素，因为有些区，如DMA或DMA32，
+代表了相对稀缺的资源。Linux选择了一个默认的Node ordered zonelist。这意味着在使用按NUMA距
+离排序的远程节点之前，它会尝试回退到同一节点的其他分区。
+
+默认情况下，Linux会尝试从执行请求的CPU被分配到的节点中满足内存分配请求。具体来说，Linux将试
+图从请求来源的节点的适当分区列表中的第一个节点进行分配。这被称为“本地分配”。如果“本地”节点不能
+满足请求，内核将检查所选分区列表中其他节点的区域，寻找列表中第一个能满足请求的区域。
+
+本地分配将倾向于保持对分配的内存的后续访问 “本地”的底层物理资源和系统互连——只要内核代表其分配
+一些内存的任务后来不从该内存迁移。Linux调度器知道平台的NUMA拓扑结构——体现在“调度域”数据结构
+中[见 Documentation/scheduler/sched-domains.rst]——并且调度器试图尽量减少任务迁移到遥
+远的调度域中。然而，调度器并没有直接考虑到任务的NUMA足迹。因此，在充分不平衡的情况下，任务可
+以在节点之间迁移，远离其初始节点和内核数据结构。
+
+系统管理员和应用程序设计者可以使用各种CPU亲和命令行接口，如taskset(1)和numactl(1)，以及程
+序接口，如sched_setaffinity(2)，来限制任务的迁移，以改善NUMA定位。此外，人们可以使用
+Linux NUMA内存策略修改内核的默认本地分配行为。 [见
+:ref:`Documentation/admin-guide/mm/numa_memory_policy.rst <numa_memory_policy>`].
+
+系统管理员可以使用控制组和CPUsets限制非特权用户在调度或NUMA命令和功能中可以指定的CPU和节点
+的内存。 [见 Documentation/admin-guide/cgroup-v1/cpusets.rst]
+
+在不隐藏无内存节点的架构上，Linux会在分区列表中只包括有内存的区域[节点]。这意味着对于一个无
+内存的节点，“本地内存节点”——CPU节点的分区列表中的第一个区域的节点——将不是节点本身。相反，它
+将是内核在建立分区列表时选择的离它最近的有内存的节点。所以，默认情况下，本地分配将由内核提供
+最近的可用内存来完成。这是同一机制的结果，该机制允许这种分配在一个包含内存的节点溢出时回退到
+其他附近的节点。
+
+一些内核分配不希望或不能容忍这种分配回退行为。相反，他们想确保他们从指定的节点获得内存，或者
+得到通知说该节点没有空闲内存。例如，当一个子系统分配每个CPU的内存资源时，通常是这种情况。
+
+一个典型的分配模式是使用内核的numa_node_id()或CPU_to_node()函数获得“当前CPU”所在节点的
+节点ID，然后只从返回的节点ID请求内存。当这样的分配失败时，请求的子系统可以恢复到它自己的回退
+路径。板块内核内存分配器就是这样的一个例子。或者，子系统可以选择在分配失败时禁用或不启用自己。
+内核分析子系统就是这样的一个例子。
+
+如果架构支持——不隐藏无内存节点，那么连接到无内存节点的CPU将总是产生回退路径的开销，或者一些
+子系统如果试图完全从无内存的节点分配内存，将无法初始化。为了透明地支持这种架构，内核子系统可
+以使用numa_mem_id()或cpu_to_mem()函数来定位调用或指定CPU的“本地内存节点”。同样，这是同
+一个节点，默认的本地页分配将从这个节点开始尝试。
diff --git a/Documentation/translations/zh_CN/vm/overcommit-accounting.rst b/Documentation/translations/zh_CN/vm/overcommit-accounting.rst
new file mode 100644
index 000000000000..8765cb118f24
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/overcommit-accounting.rst
@@ -0,0 +1,86 @@
+:Original: Documentation/vm/overcommit-accounting.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+
+==============
+超量使用审计
+==============
+
+Linux内核支持下列超量使用处理模式
+
+0
+	启发式超量使用处理。拒绝明显的地址空间超量使用。用于一个典型的系统。
+	它确保严重的疯狂分配失败，同时允许超量使用以减少swap的使用。在这种模式下，
+	允许root分配稍多的内存。这是默认的。
+1
+	总是超量使用。适用于一些科学应用。经典的例子是使用稀疏数组的代码，只是依赖
+	几乎完全由零页组成的虚拟内存
+
+2
+	不超量使用。系统提交的总地址空间不允许超过swap+一个可配置的物理RAM的数量
+	（默认为50%）。根据你使用的数量，在大多数情况下，这意味着一个进程在访问页面时
+	不会被杀死，但会在内存分配上收到相应的错误。
+
+	对于那些想保证他们的内存分配在未来可用而又不需要初始化每一个页面的应用程序来说
+	是很有用的。
+
+超量使用策略是通过sysctl  `vm.overcommit_memory` 设置的。
+
+可以通过 `vm.overcommit_ratio` （百分比）或 `vm.overcommit_kbytes` （绝对值）
+来设置超限数量。这些只有在 `vm.overcommit_memory` 被设置为2时才有效果。
+
+在 ``/proc/meminfo`` 中可以分别以CommitLimit和Committed_AS的形式查看当前
+的超量使用和提交量。
+
+陷阱
+====
+
+C语言的堆栈增长是一个隐含的mremap。如果你想得到绝对的保证，并在接近边缘的地方运行，
+你 **必须** 为你认为你需要的最大尺寸的堆栈进行mmap。对于典型的堆栈使用来说，这并
+不重要，但如果你真的非常关心的话，这就是一个值得关注的案例。
+
+
+在模式2中，MAP_NORESERVE标志被忽略。
+
+
+它是如何工作的
+==============
+
+超量使用是基于以下规则
+
+对于文件映射
+	| SHARED or READ-only	-	0 cost (该文件是映射而不是交换)
+	| PRIVATE WRITABLE	-	每个实例的映射大小
+
+对于匿名或者 ``/dev/zero`` 映射
+	| SHARED			-	映射的大小
+	| PRIVATE READ-only	-	0 cost (但作用不大)
+	| PRIVATE WRITABLE	-	每个实例的映射大小
+
+额外的计数
+	| 通过mmap制作可写副本的页面
+	| 从同一池中提取的shmfs内存
+
+状态
+====
+
+*	我们核算mmap内存映射
+*	我们核算mprotect在提交中的变化
+*	我们核算mremap的大小变化
+*	我们的审计 brk
+*	审计munmap
+*	我们在/proc中报告commit 状态
+*	核对并检查分叉的情况
+*	审查堆栈处理/执行中的构建
+*	叙述SHMfs的情况
+*	实现实际限制的执行
+
+待续
+====
+*	ptrace 页计数（这很难）。
diff --git a/Documentation/translations/zh_CN/vm/page_frags.rst b/Documentation/translations/zh_CN/vm/page_frags.rst
new file mode 100644
index 000000000000..ad27fed33634
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/page_frags.rst
@@ -0,0 +1,38 @@
+:Original: Documentation/vm/page_frag.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+========
+页面片段
+========
+
+一个页面片段是一个任意长度的任意偏移的内存区域，它位于一个0或更高阶的复合页面中。
+该页中的多个碎片在该页的引用计数器中被单独计算。
+
+page_frag函数，page_frag_alloc和page_frag_free，为页面片段提供了一个简单
+的分配框架。这被网络堆栈和网络设备驱动使用，以提供一个内存的支持区域，作为
+sk_buff->head使用，或者用于skb_shared_info的 “frags” 部分。
+
+为了使用页面片段API，需要一个支持页面片段的缓冲区。这为碎片分配提供了一个中心点，
+并允许多个调用使用一个缓存的页面。这样做的好处是可以避免对get_page的多次调用，
+这在分配时开销可能会很大。然而，由于这种缓存的性质，要求任何对缓存的调用都要受到每
+个CPU的限制，或者每个CPU的限制，并在执行碎片分配时强制禁止中断。
+
+网络堆栈在每个CPU使用两个独立的缓存来处理碎片分配。netdev_alloc_cache被使用
+netdev_alloc_frag和__netdev_alloc_skb调用的调用者使用。napi_alloc_cache
+被调用__napi_alloc_frag和__napi_alloc_skb的调用者使用。这两个调用的主要区别是
+它们可能被调用的环境。“netdev” 前缀的函数可以在任何上下文中使用，因为这些函数
+将禁用中断，而 ”napi“ 前缀的函数只可以在softirq上下文中使用。
+
+许多网络设备驱动程序使用类似的方法来分配页面片段，但页面片段是在环或描述符级别上
+缓存的。为了实现这些情况，有必要提供一种拆解页面缓存的通用方法。出于这个原因，
+__page_frag_cache_drain被实现了。它允许通过一次调用从一个页面释放多个引用。
+这样做的好处是，它允许清理被添加到一个页面的多个引用，以避免每次分配都调用
+get_page。
+
+Alexander Duyck，2016年11月29日。
diff --git a/Documentation/translations/zh_CN/vm/page_owner.rst b/Documentation/translations/zh_CN/vm/page_owner.rst
new file mode 100644
index 000000000000..9e951fabba9d
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/page_owner.rst
@@ -0,0 +1,116 @@
+:Original: Documentation/vm/page_owner.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+================================
+page owner: 跟踪谁分配的每个页面
+================================
+
+概述
+====
+
+page owner是用来追踪谁分配的每一个页面。它可以用来调试内存泄漏或找到内存占用者。
+当分配发生时，有关分配的信息，如调用堆栈和页面的顺序被存储到每个页面的特定存储中。
+当我们需要了解所有页面的状态时，我们可以获得并分析这些信息。
+
+尽管我们已经有了追踪页面分配/释放的tracepoint，但用它来分析谁分配的每个页面是
+相当复杂的。我们需要扩大跟踪缓冲区，以防止在用户空间程序启动前出现重叠。而且，启
+动的程序会不断地将跟踪缓冲区转出，供以后分析，这将会改变系统的行为，会产生更多的
+可能性，而不是仅仅保留在内存中，所以不利于调试。
+
+页面所有者也可以用于各种目的。例如，可以通过每个页面的gfp标志信息获得精确的碎片
+统计。如果启用了page owner，它就已经实现并激活了。我们非常欢迎其他用途。
+
+page owner在默认情况下是禁用的。所以，如果你想使用它，你需要在你的启动cmdline
+中加入"page_owner=on"。如果内核是用page owner构建的，并且由于没有启用启动
+选项而在运行时禁用page owner，那么运行时的开销是很小的。如果在运行时禁用，它不
+需要内存来存储所有者信息，所以没有运行时内存开销。而且，页面所有者在页面分配器的
+热路径中只插入了两个不可能的分支，如果不启用，那么分配就会像没有页面所有者的内核
+一样进行。这两个不可能的分支应该不会影响到分配的性能，特别是在静态键跳转标签修补
+功能可用的情况下。以下是由于这个功能而导致的内核代码大小的变化。
+
+- 没有page owner::
+
+   text    data     bss     dec     hex filename
+   48392   2333     644   51369    c8a9 mm/page_alloc.o
+
+- 有page owner::
+
+   text    data     bss     dec     hex filename
+   48800   2445     644   51889    cab1 mm/page_alloc.o
+   6662     108      29    6799    1a8f mm/page_owner.o
+   1025       8       8    1041     411 mm/page_ext.o
+
+虽然总共增加了8KB的代码，但page_alloc.o增加了520字节，其中不到一半是在hotpath
+中。构建带有page owner的内核，并在需要时打开它，将是调试内核内存问题的最佳选择。
+
+有一个问题是由实现细节引起的。页所有者将信息存储到struct page扩展的内存中。这
+个内存的初始化时间比稀疏内存系统中的页面分配器启动的时间要晚一些，所以，在初始化
+之前，许多页面可以被分配，但它们没有所有者信息。为了解决这个问题，这些早期分配的
+页面在初始化阶段被调查并标记为分配。虽然这并不意味着它们有正确的所有者信息，但至
+少，我们可以更准确地判断该页是否被分配。在2GB内存的x86-64虚拟机上，有13343
+个早期分配的页面被捕捉和标记，尽管它们大部分是由结构页扩展功能分配的。总之，在这
+之后，没有任何页面处于未追踪状态。
+
+使用方法
+========
+
+1) 构建用户空间的帮助::
+
+	cd tools/vm
+	make page_owner_sort
+
+2) 启用page owner: 添加 "page_owner=on" 到 boot cmdline.
+
+3) 做你想调试的工作。
+
+4) 分析来自页面所有者的信息::
+
+	cat /sys/kernel/debug/page_owner > page_owner_full.txt
+	./page_owner_sort page_owner_full.txt sorted_page_owner.txt
+
+   ``page_owner_full.txt`` 的一般输出情况如下(输出信息无翻译价值)::
+
+	Page allocated via order XXX, ...
+	PFN XXX ...
+	// Detailed stack
+
+	Page allocated via order XXX, ...
+	PFN XXX ...
+	// Detailed stack
+
+   ``page_owner_sort`` 工具忽略了 ``PFN`` 行，将剩余的行放在buf中，使用regexp提
+   取页序值，计算buf的次数和页数，最后根据参数进行排序。
+
+   在 ``sorted_page_owner.txt`` 中可以看到关于谁分配了每个页面的结果。一般输出::
+
+	XXX times, XXX pages:
+	Page allocated via order XXX, ...
+	// Detailed stack
+
+   默认情况下， ``page_owner_sort`` 是根据buf的时间来排序的。如果你想
+   按buf的页数排序，请使用-m参数。详细的参数是:
+
+   基本函数:
+
+	Sort:
+		-a		按内存分配时间排序
+		-m		按总内存排序
+		-p		按pid排序。
+		-P		按tgid排序。
+		-r		按内存释放时间排序。
+		-s		按堆栈跟踪排序。
+		-t		按时间排序（默认）。
+
+   其它函数:
+
+	Cull:
+		-c		通过比较堆栈跟踪而不是总块来进行剔除。
+
+	Filter:
+		-f		过滤掉内存已被释放的块的信息。
diff --git a/Documentation/translations/zh_CN/vm/page_table_check.rst b/Documentation/translations/zh_CN/vm/page_table_check.rst
new file mode 100644
index 000000000000..a29fc1b360e6
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/page_table_check.rst
@@ -0,0 +1,56 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Original: Documentation/vm/page_table_check.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+========
+页表检查
+========
+
+概述
+====
+
+页表检查允许通过确保防止某些类型的内存损坏来强化内核。
+
+当新的页面可以从用户空间访问时，页表检查通过将它们的页表项（PTEs PMD等）添加到页表中来执行额外
+的验证。
+
+在检测到损坏的情况下，内核会被崩溃。页表检查有一个小的性能和内存开销。因此，它在默认情况下是禁用
+的，但是在额外的加固超过性能成本的系统上，可以选择启用。另外，由于页表检查是同步的，它可以帮助调
+试双映射内存损坏问题，在错误的映射发生时崩溃内核，而不是在内存损坏错误发生后内核崩溃。
+
+双重映射检测逻辑
+================
+
++-------------------+-------------------+-------------------+------------------+
+| Current Mapping   | New mapping       | Permissions       | Rule             |
++===================+===================+===================+==================+
+| Anonymous         | Anonymous         | Read              | Allow            |
++-------------------+-------------------+-------------------+------------------+
+| Anonymous         | Anonymous         | Read / Write      | Prohibit         |
++-------------------+-------------------+-------------------+------------------+
+| Anonymous         | Named             | Any               | Prohibit         |
++-------------------+-------------------+-------------------+------------------+
+| Named             | Anonymous         | Any               | Prohibit         |
++-------------------+-------------------+-------------------+------------------+
+| Named             | Named             | Any               | Allow            |
++-------------------+-------------------+-------------------+------------------+
+
+启用页表检查
+============
+
+用以下方法构建内核:
+
+- PAGE_TABLE_CHECK=y
+  注意，它只能在ARCH_SUPPORTS_PAGE_TABLE_CHECK可用的平台上启用。
+
+- 使用 "page_table_check=on" 内核参数启动。
+
+可以选择用PAGE_TABLE_CHECK_ENFORCED来构建内核，以便在没有额外的内核参数的情况下获得页表
+支持。
diff --git a/Documentation/translations/zh_CN/vm/remap_file_pages.rst b/Documentation/translations/zh_CN/vm/remap_file_pages.rst
new file mode 100644
index 000000000000..af6b7e28af23
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/remap_file_pages.rst
@@ -0,0 +1,32 @@
+:Original: Documentation/vm/remap_file_pages.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+==============================
+remap_file_pages()系统调用
+==============================
+
+remap_file_pages()系统调用被用来创建一个非线性映射，也就是说，在这个映射中，
+文件的页面被无序映射到内存中。使用remap_file_pages()比重复调用mmap(2)的好
+处是，前者不需要内核创建额外的VMA（虚拟内存区）数据结构。
+
+支持非线性映射需要在内核虚拟内存子系统中编写大量的non-trivial的代码，包括热
+路径。另外，为了使非线性映射工作，内核需要一种方法来区分正常的页表项和带有文件
+偏移的项（pte_file）。内核为达到这个目的在PTE中保留了标志。PTE标志是稀缺资
+源，特别是在某些CPU架构上。如果能腾出这个标志用于其他用途就更好了。
+
+幸运的是，在生活中并没有很多remap_file_pages()的用户。只知道有一个企业的RDBMS
+实现在32位系统上使用这个系统调用来映射比32位虚拟地址空间线性尺寸更大的文件。
+由于64位系统的广泛使用，这种使用情况已经不重要了。
+
+syscall被废弃了，现在用一个模拟来代替它。仿真会创建新的VMA，而不是非线性映射。
+对于remap_file_pages()的少数用户来说，它的工作速度会变慢，但ABI被保留了。
+
+仿真的一个副作用（除了性能之外）是，由于额外的VMA，用户可以更容易达到
+vm.max_map_count的限制。关于限制的更多细节，请参见DEFAULT_MAX_MAP_COUNT
+的注释。
diff --git a/Documentation/translations/zh_CN/vm/split_page_table_lock.rst b/Documentation/translations/zh_CN/vm/split_page_table_lock.rst
new file mode 100644
index 000000000000..50694d97c426
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/split_page_table_lock.rst
@@ -0,0 +1,96 @@
+:Original: Documentation/vm/split_page_table_lock.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+=================================
+分页表锁（split page table lock）
+=================================
+
+最初，mm->page_table_lock spinlock保护了mm_struct的所有页表。但是这种方
+法导致了多线程应用程序的缺页异常可扩展性差，因为对锁的争夺很激烈。为了提高可扩
+展性，我们引入了分页表锁。
+
+有了分页表锁，我们就有了单独的每张表锁来顺序化对表的访问。目前，我们对PTE和
+PMD表使用分页锁。对高层表的访问由mm->page_table_lock保护。
+
+有一些辅助工具来锁定/解锁一个表和其他访问器函数:
+
+ - pte_offset_map_lock()
+	映射pte并获取PTE表锁，返回所取锁的指针；
+ - pte_unmap_unlock()
+	解锁和解映射PTE表；
+ - pte_alloc_map_lock()
+	如果需要的话，分配PTE表并获取锁，如果分配失败，返回已获取的锁的指针
+	或NULL；
+ - pte_lockptr()
+	返回指向PTE表锁的指针；
+ - pmd_lock()
+	取得PMD表锁，返回所取锁的指针。
+ - pmd_lockptr()
+	返回指向PMD表锁的指针；
+
+如果CONFIG_SPLIT_PTLOCK_CPUS（通常为4）小于或等于NR_CPUS，则在编译
+时启用PTE表的分页表锁。如果分页锁被禁用，所有的表都由mm->page_table_lock
+来保护。
+
+如果PMD表启用了分页锁，并且架构支持它，那么PMD表的分页锁就会被启用（见
+下文）。
+
+Hugetlb 和分页表锁
+==================
+
+Hugetlb可以支持多种页面大小。我们只对PMD级别使用分页锁，但不对PUD使用。
+
+Hugetlb特定的辅助函数:
+
+ - huge_pte_lock()
+	对PMD_SIZE页面采取pmd分割锁，否则mm->page_table_lock；
+ - huge_pte_lockptr()
+	返回指向表锁的指针。
+
+架构对分页表锁的支持
+====================
+
+没有必要特别启用PTE分页表锁：所有需要的东西都由pgtable_pte_page_ctor()
+和pgtable_pte_page_dtor()完成，它们必须在PTE表分配/释放时被调用。
+
+确保架构不使用slab分配器来分配页表：slab使用page->slab_cache来分配其页
+面。这个区域与page->ptl共享存储。
+
+PMD分页锁只有在你有两个以上的页表级别时才有意义。
+
+启用PMD分页锁需要在PMD表分配时调用pgtable_pmd_page_ctor()，在释放时调
+用pgtable_pmd_page_dtor()。
+
+分配通常发生在pmd_alloc_one()中，释放发生在pmd_free()和pmd_free_tlb()
+中，但要确保覆盖所有的PMD表分配/释放路径：即X86_PAE在pgd_alloc()中预先
+分配一些PMD。
+
+一切就绪后，你可以设置CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK。
+
+注意：pgtable_pte_page_ctor()和pgtable_pmd_page_ctor()可能失败--必
+须正确处理。
+
+page->ptl
+=========
+
+page->ptl用于访问分割页表锁，其中'page'是包含该表的页面struct page。它
+与page->private（以及union中的其他几个字段）共享存储。
+
+为了避免增加struct page的大小并获得最佳性能，我们使用了一个技巧:
+
+ - 如果spinlock_t适合于long，我们使用page->ptr作为spinlock，这样我们
+   就可以避免间接访问并节省一个缓存行。
+ - 如果spinlock_t的大小大于long的大小，我们使用page->ptl作为spinlock_t
+   的指针并动态分配它。这允许在启用DEBUG_SPINLOCK或DEBUG_LOCK_ALLOC的
+   情况下使用分页锁，但由于间接访问而多花了一个缓存行。
+
+PTE表的spinlock_t分配在pgtable_pte_page_ctor()中，PMD表的spinlock_t
+分配在pgtable_pmd_page_ctor()中。
+
+请不要直接访问page->ptl - -使用适当的辅助函数。
diff --git a/Documentation/translations/zh_CN/vm/z3fold.rst b/Documentation/translations/zh_CN/vm/z3fold.rst
new file mode 100644
index 000000000000..57204aa08caa
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/z3fold.rst
@@ -0,0 +1,31 @@
+:Original: Documentation/vm/z3fold.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+======
+z3fold
+======
+
+z3fold是一个专门用于存储压缩页的分配器。它被设计为每个物理页最多可以存储三个压缩页。
+它是zbud的衍生物，允许更高的压缩率，保持其前辈的简单性和确定性。
+
+z3fold和zbud的主要区别是:
+
+* 与zbud不同的是，z3fold允许最大的PAGE_SIZE分配。
+* z3fold在其页面中最多可以容纳3个压缩页面
+* z3fold本身没有输出任何API，因此打算通过zpool的API来使用
+
+为了保持确定性和简单性，z3fold，就像zbud一样，总是在每页存储一个整数的压缩页，但是
+它最多可以存储3页，不像zbud最多可以存储2页。因此压缩率达到2.7倍左右，而zbud的压缩
+率是1.7倍左右。
+
+不像zbud（但也像zsmalloc），z3fold_alloc()那样不返回一个可重复引用的指针。相反，它
+返回一个无符号长句柄，它编码了被分配对象的实际位置。
+
+保持有效的压缩率接近于zsmalloc，z3fold不依赖于MMU的启用，并提供更可预测的回收行
+为，这使得它更适合于小型和反应迅速的系统。
diff --git a/Documentation/translations/zh_CN/vm/zsmalloc.rst b/Documentation/translations/zh_CN/vm/zsmalloc.rst
new file mode 100644
index 000000000000..29e9c70a8eb6
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/zsmalloc.rst
@@ -0,0 +1,78 @@
+:Original: Documentation/vm/zs_malloc.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+========
+zsmalloc
+========
+
+这个分配器是为与zram一起使用而设计的。因此，该分配器应该在低内存条件下工作良好。特别是，
+它从未尝试过higher order页面的分配，这在内存压力下很可能会失败。另一方面，如果我们只
+是使用单（0-order）页，它将遭受非常高的碎片化 - 任何大小为PAGE_SIZE/2或更大的对象将
+占据整个页面。这是其前身（xvmalloc）的主要问题之一。
+
+为了克服这些问题，zsmalloc分配了一堆0-order页面，并使用各种"struct page"字段将它
+们链接起来。这些链接的页面作为一个单一的higher order页面，即一个对象可以跨越0-order
+页面的边界。代码将这些链接的页面作为一个实体，称为zspage。
+
+为了简单起见，zsmalloc只能分配大小不超过PAGE_SIZE的对象，因为这满足了所有当前用户的
+要求（在最坏的情况下，页面是不可压缩的，因此以"原样"即未压缩的形式存储）。对于大于这
+个大小的分配请求，会返回失败（见zs_malloc）。
+
+此外，zs_malloc()并不返回一个可重复引用的指针。相反，它返回一个不透明的句柄（无符号
+长），它编码了被分配对象的实际位置。这种间接性的原因是zsmalloc并不保持zspages的永久
+映射，因为这在32位系统上会导致问题，因为内核空间映射的VA区域非常小。因此，在使用分配
+的内存之前，对象必须使用zs_map_object()进行映射以获得一个可用的指针，随后使用
+zs_unmap_object()解除映射。
+
+stat
+====
+
+通过CONFIG_ZSMALLOC_STAT，我们可以通过 ``/sys/kernel/debug/zsmalloc/<user name>``
+看到zsmalloc内部信息。下面是一个统计输出的例子。::
+
+ # cat /sys/kernel/debug/zsmalloc/zram0/classes
+
+ class  size almost_full almost_empty obj_allocated   obj_used pages_used pages_per_zspage
+    ...
+    ...
+     9   176           0            1           186        129          8                4
+    10   192           1            0          2880       2872        135                3
+    11   208           0            1           819        795         42                2
+    12   224           0            1           219        159         12                4
+    ...
+    ...
+
+
+class
+	索引
+size
+	zspage存储对象大小
+almost_empty
+	ZS_ALMOST_EMPTY zspage的数量（见下文）。
+almost_full
+	ZS_ALMOST_FULL zspage的数量(见下图)
+obj_allocated
+	已分配对象的数量
+obj_used
+	分配给用户的对象的数量
+pages_used
+	为该类分配的页数
+pages_per_zspage
+	组成一个zspage的0-order页面的数量
+
+当n <= N / f时，我们将一个zspage分配给ZS_ALMOST_EMPTYfullness组，其中
+
+* n = 已分配对象的数量
+* N = zspage可以存储的对象总数
+* f = fullness_threshold_frac(即，目前是4个)
+
+同样地，我们将zspage分配给:
+
+* ZS_ALMOST_FULL  when n > N / f
+* ZS_EMPTY        when n == 0
+* ZS_FULL         when n == N
diff --git a/Documentation/userspace-api/ioctl/cdrom.rst b/Documentation/userspace-api/ioctl/cdrom.rst
index 682948fc88a3..2ad91dbebd7c 100644
--- a/Documentation/userspace-api/ioctl/cdrom.rst
+++ b/Documentation/userspace-api/ioctl/cdrom.rst
@@ -718,6 +718,9 @@ CDROMPLAYBLK
 
 
 CDROMGETSPINDOWN
+	Obsolete, was ide-cd only
+
+
 	usage::
 
 	  char spindown;
@@ -736,6 +739,9 @@ CDROMGETSPINDOWN
 
 
 CDROMSETSPINDOWN
+	Obsolete, was ide-cd only
+
+
 	usage::
 
 	  char spindown
diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst
index f35552ff19ba..b8ea59493964 100644
--- a/Documentation/userspace-api/landlock.rst
+++ b/Documentation/userspace-api/landlock.rst
@@ -1,14 +1,14 @@
 .. SPDX-License-Identifier: GPL-2.0
 .. Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
 .. Copyright © 2019-2020 ANSSI
-.. Copyright © 2021 Microsoft Corporation
+.. Copyright © 2021-2022 Microsoft Corporation
 
 =====================================
 Landlock: unprivileged access control
 =====================================
 
 :Author: Mickaël Salaün
-:Date: March 2021
+:Date: May 2022
 
 The goal of Landlock is to enable to restrict ambient rights (e.g. global
 filesystem access) for a set of processes.  Because Landlock is a stackable
@@ -18,6 +18,13 @@ is expected to help mitigate the security impact of bugs or
 unexpected/malicious behaviors in user space applications.  Landlock empowers
 any process, including unprivileged ones, to securely restrict themselves.
 
+We can quickly make sure that Landlock is enabled in the running system by
+looking for "landlock: Up and running" in kernel logs (as root): ``dmesg | grep
+landlock || journalctl -kg landlock`` .  Developers can also easily check for
+Landlock support with a :ref:`related system call <landlock_abi_versions>`.  If
+Landlock is not currently supported, we need to :ref:`configure the kernel
+appropriately <kernel_support>`.
+
 Landlock rules
 ==============
 
@@ -29,14 +36,15 @@ the thread enforcing it, and its future children.
 Defining and enforcing a security policy
 ----------------------------------------
 
-We first need to create the ruleset that will contain our rules.  For this
+We first need to define the ruleset that will contain our rules.  For this
 example, the ruleset will contain rules that only allow read actions, but write
 actions will be denied.  The ruleset then needs to handle both of these kind of
-actions.
+actions.  This is required for backward and forward compatibility (i.e. the
+kernel and user space may not know each other's supported restrictions), hence
+the need to be explicit about the denied-by-default access rights.
 
 .. code-block:: c
 
-    int ruleset_fd;
     struct landlock_ruleset_attr ruleset_attr = {
         .handled_access_fs =
             LANDLOCK_ACCESS_FS_EXECUTE |
@@ -51,9 +59,34 @@ actions.
             LANDLOCK_ACCESS_FS_MAKE_SOCK |
             LANDLOCK_ACCESS_FS_MAKE_FIFO |
             LANDLOCK_ACCESS_FS_MAKE_BLOCK |
-            LANDLOCK_ACCESS_FS_MAKE_SYM,
+            LANDLOCK_ACCESS_FS_MAKE_SYM |
+            LANDLOCK_ACCESS_FS_REFER,
     };
 
+Because we may not know on which kernel version an application will be
+executed, it is safer to follow a best-effort security approach.  Indeed, we
+should try to protect users as much as possible whatever the kernel they are
+using.  To avoid binary enforcement (i.e. either all security features or
+none), we can leverage a dedicated Landlock command to get the current version
+of the Landlock ABI and adapt the handled accesses.  Let's check if we should
+remove the `LANDLOCK_ACCESS_FS_REFER` access right which is only supported
+starting with the second version of the ABI.
+
+.. code-block:: c
+
+    int abi;
+
+    abi = landlock_create_ruleset(NULL, 0, LANDLOCK_CREATE_RULESET_VERSION);
+    if (abi < 2) {
+        ruleset_attr.handled_access_fs &= ~LANDLOCK_ACCESS_FS_REFER;
+    }
+
+This enables to create an inclusive ruleset that will contain our rules.
+
+.. code-block:: c
+
+    int ruleset_fd;
+
     ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
     if (ruleset_fd < 0) {
         perror("Failed to create a ruleset");
@@ -92,6 +125,11 @@ descriptor.
         return 1;
     }
 
+It may also be required to create rules following the same logic as explained
+for the ruleset creation, by filtering access rights according to the Landlock
+ABI version.  In this example, this is not required because
+`LANDLOCK_ACCESS_FS_REFER` is not allowed by any rule.
+
 We now have a ruleset with one rule allowing read access to ``/usr`` while
 denying all other handled accesses for the filesystem.  The next step is to
 restrict the current thread from gaining more privileges (e.g. thanks to a SUID
@@ -125,6 +163,27 @@ ruleset.
 
 Full working code can be found in `samples/landlock/sandboxer.c`_.
 
+Good practices
+--------------
+
+It is recommended setting access rights to file hierarchy leaves as much as
+possible.  For instance, it is better to be able to have ``~/doc/`` as a
+read-only hierarchy and ``~/tmp/`` as a read-write hierarchy, compared to
+``~/`` as a read-only hierarchy and ``~/tmp/`` as a read-write hierarchy.
+Following this good practice leads to self-sufficient hierarchies that don't
+depend on their location (i.e. parent directories).  This is particularly
+relevant when we want to allow linking or renaming.  Indeed, having consistent
+access rights per directory enables to change the location of such directory
+without relying on the destination directory access rights (except those that
+are required for this operation, see `LANDLOCK_ACCESS_FS_REFER` documentation).
+Having self-sufficient hierarchies also helps to tighten the required access
+rights to the minimal set of data.  This also helps avoid sinkhole directories,
+i.e.  directories where data can be linked to but not linked from.  However,
+this depends on data organization, which might not be controlled by developers.
+In this case, granting read-write access to ``~/tmp/``, instead of write-only
+access, would potentially allow to move ``~/tmp/`` to a non-readable directory
+and still keep the ability to list the content of ``~/tmp/``.
+
 Layers of file path access rights
 ---------------------------------
 
@@ -192,6 +251,58 @@ To be allowed to use :manpage:`ptrace(2)` and related syscalls on a target
 process, a sandboxed process should have a subset of the target process rules,
 which means the tracee must be in a sub-domain of the tracer.
 
+Compatibility
+=============
+
+Backward and forward compatibility
+----------------------------------
+
+Landlock is designed to be compatible with past and future versions of the
+kernel.  This is achieved thanks to the system call attributes and the
+associated bitflags, particularly the ruleset's `handled_access_fs`.  Making
+handled access right explicit enables the kernel and user space to have a clear
+contract with each other.  This is required to make sure sandboxing will not
+get stricter with a system update, which could break applications.
+
+Developers can subscribe to the `Landlock mailing list
+<https://subspace.kernel.org/lists.linux.dev.html>`_ to knowingly update and
+test their applications with the latest available features.  In the interest of
+users, and because they may use different kernel versions, it is strongly
+encouraged to follow a best-effort security approach by checking the Landlock
+ABI version at runtime and only enforcing the supported features.
+
+.. _landlock_abi_versions:
+
+Landlock ABI versions
+---------------------
+
+The Landlock ABI version can be read with the sys_landlock_create_ruleset()
+system call:
+
+.. code-block:: c
+
+    int abi;
+
+    abi = landlock_create_ruleset(NULL, 0, LANDLOCK_CREATE_RULESET_VERSION);
+    if (abi < 0) {
+        switch (errno) {
+        case ENOSYS:
+            printf("Landlock is not supported by the current kernel.\n");
+            break;
+        case EOPNOTSUPP:
+            printf("Landlock is currently disabled.\n");
+            break;
+        }
+        return 0;
+    }
+    if (abi >= 2) {
+        printf("Landlock supports LANDLOCK_ACCESS_FS_REFER.\n");
+    }
+
+The following kernel interfaces are implicitly supported by the first ABI
+version.  Features only supported from a specific version are explicitly marked
+as such.
+
 Kernel interface
 ================
 
@@ -228,21 +339,6 @@ Enforcing a ruleset
 Current limitations
 ===================
 
-File renaming and linking
--------------------------
-
-Because Landlock targets unprivileged access controls, it is needed to properly
-handle composition of rules.  Such property also implies rules nesting.
-Properly handling multiple layers of ruleset, each one of them able to restrict
-access to files, also implies to inherit the ruleset restrictions from a parent
-to its hierarchy.  Because files are identified and restricted by their
-hierarchy, moving or linking a file from one directory to another implies to
-propagate the hierarchy constraints.  To protect against privilege escalations
-through renaming or linking, and for the sake of simplicity, Landlock currently
-limits linking and renaming to the same directory.  Future Landlock evolutions
-will enable more flexibility for renaming and linking, with dedicated ruleset
-flags.
-
 Filesystem topology modification
 --------------------------------
 
@@ -267,8 +363,8 @@ restrict such paths with dedicated ruleset flags.
 Ruleset layers
 --------------
 
-There is a limit of 64 layers of stacked rulesets.  This can be an issue for a
-task willing to enforce a new ruleset in complement to its 64 inherited
+There is a limit of 16 layers of stacked rulesets.  This can be an issue for a
+task willing to enforce a new ruleset in complement to its 16 inherited
 rulesets.  Once this limit is reached, sys_landlock_restrict_self() returns
 E2BIG.  It is then strongly suggested to carefully build rulesets once in the
 life of a thread, especially for applications able to launch other applications
@@ -281,6 +377,44 @@ Memory usage
 Kernel memory allocated to create rulesets is accounted and can be restricted
 by the Documentation/admin-guide/cgroup-v1/memory.rst.
 
+Previous limitations
+====================
+
+File renaming and linking (ABI 1)
+---------------------------------
+
+Because Landlock targets unprivileged access controls, it needs to properly
+handle composition of rules.  Such property also implies rules nesting.
+Properly handling multiple layers of rulesets, each one of them able to
+restrict access to files, also implies inheritance of the ruleset restrictions
+from a parent to its hierarchy.  Because files are identified and restricted by
+their hierarchy, moving or linking a file from one directory to another implies
+propagation of the hierarchy constraints, or restriction of these actions
+according to the potentially lost constraints.  To protect against privilege
+escalations through renaming or linking, and for the sake of simplicity,
+Landlock previously limited linking and renaming to the same directory.
+Starting with the Landlock ABI version 2, it is now possible to securely
+control renaming and linking thanks to the new `LANDLOCK_ACCESS_FS_REFER`
+access right.
+
+.. _kernel_support:
+
+Kernel support
+==============
+
+Landlock was first introduced in Linux 5.13 but it must be configured at build
+time with `CONFIG_SECURITY_LANDLOCK=y`.  Landlock must also be enabled at boot
+time as the other security modules.  The list of security modules enabled by
+default is set with `CONFIG_LSM`.  The kernel configuration should then
+contains `CONFIG_LSM=landlock,[...]` with `[...]`  as the list of other
+potentially useful security modules for the running system (see the
+`CONFIG_LSM` help).
+
+If the running kernel doesn't have `landlock` in `CONFIG_LSM`, then we can
+still enable it by adding ``lsm=landlock,[...]`` to
+Documentation/admin-guide/kernel-parameters.rst thanks to the bootloader
+configuration.
+
 Questions and answers
 =====================
 
diff --git a/Documentation/userspace-api/media/drivers/uvcvideo.rst b/Documentation/userspace-api/media/drivers/uvcvideo.rst
index e5fd8fad333c..a290f9fadae9 100644
--- a/Documentation/userspace-api/media/drivers/uvcvideo.rst
+++ b/Documentation/userspace-api/media/drivers/uvcvideo.rst
@@ -7,7 +7,7 @@ This file documents some driver-specific aspects of the UVC driver, such as
 driver-specific ioctls and implementation notes.
 
 Questions and remarks can be sent to the Linux UVC development mailing list at
-linux-uvc-devel@lists.berlios.de.
+linux-media@vger.kernel.org.
 
 
 Extension Unit (XU) support
diff --git a/Documentation/userspace-api/media/mediactl/media-controller-model.rst b/Documentation/userspace-api/media/mediactl/media-controller-model.rst
index 222cb99debb5..78bfdfb2a322 100644
--- a/Documentation/userspace-api/media/mediactl/media-controller-model.rst
+++ b/Documentation/userspace-api/media/mediactl/media-controller-model.rst
@@ -33,3 +33,9 @@ are:
 
 -  An **interface link** is a point-to-point bidirectional control
    connection between a Linux Kernel interface and an entity.
+
+- An **ancillary link** is a point-to-point connection denoting that two
+  entities form a single logical unit. For example this could represent the
+  fact that a particular camera sensor and lens controller form a single
+  physical module, meaning this lens controller drives the lens for this
+  camera sensor.
+\ No newline at end of file
diff --git a/Documentation/userspace-api/media/mediactl/media-types.rst b/Documentation/userspace-api/media/mediactl/media-types.rst
index 0a26397bd01d..0ffeece1e0c8 100644
--- a/Documentation/userspace-api/media/mediactl/media-types.rst
+++ b/Documentation/userspace-api/media/mediactl/media-types.rst
@@ -412,14 +412,21 @@ must be set for every pad.
 	  is set by drivers and is read-only for applications.
 
     *  -  ``MEDIA_LNK_FL_LINK_TYPE``
-       -  This is a bitmask that defines the type of the link. Currently,
-	  two types of links are supported:
+       -  This is a bitmask that defines the type of the link. The following
+	  link types are currently supported:
 
 	  .. _MEDIA-LNK-FL-DATA-LINK:
 
-	  ``MEDIA_LNK_FL_DATA_LINK`` if the link is between two pads
+	  ``MEDIA_LNK_FL_DATA_LINK`` for links that represent a data connection
+	  between two pads.
 
 	  .. _MEDIA-LNK-FL-INTERFACE-LINK:
 
-	  ``MEDIA_LNK_FL_INTERFACE_LINK`` if the link is between an
-	  interface and an entity
+	  ``MEDIA_LNK_FL_INTERFACE_LINK`` for links that associate an entity to its
+	  interface.
+
+	  .. _MEDIA-LNK-FL-ANCILLARY-LINK:
+
+	  ``MEDIA_LNK_FL_ANCILLARY_LINK`` for links that represent a physical
+	  relationship between two entities. The link may or may not be
+	  immutable, so applications must not assume either case.
diff --git a/Documentation/userspace-api/media/v4l/dev-decoder.rst b/Documentation/userspace-api/media/v4l/dev-decoder.rst
index 3cf2b496f2d0..675bc2c3c6b8 100644
--- a/Documentation/userspace-api/media/v4l/dev-decoder.rst
+++ b/Documentation/userspace-api/media/v4l/dev-decoder.rst
@@ -72,6 +72,12 @@ coded resolution
 coded width
    width for given coded resolution.
 
+coding tree unit
+   processing unit of the HEVC codec (corresponds to macroblock units in
+   H.264, VP8, VP9),
+   can use block structures of up to 64×64 pixels.
+   Good at sub-partitioning the picture into variable sized structures.
+
 decode order
    the order in which frames are decoded; may differ from display order if the
    coded format includes a feature of frame reordering; for decoders,
@@ -104,7 +110,8 @@ keyframe
 macroblock
    a processing unit in image and video compression formats based on linear
    block transforms (e.g. H.264, VP8, VP9); codec-specific, but for most of
-   popular codecs the size is 16x16 samples (pixels).
+   popular codecs the size is 16x16 samples (pixels). The HEVC codec uses a
+   slightly more flexible processing unit called coding tree unit (CTU).
 
 OUTPUT
    the source buffer queue; for decoders, the queue of buffers containing
diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst
index 6541e4c32b26..bee73065e993 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst
@@ -649,10 +649,16 @@ Stateless Codec Control ID
         :c:type:`timeval` in struct :c:type:`v4l2_buffer` to a __u64.
     * - __u32
       - ``pic_num``
-      -
+      - For short term references, this must match the derived value PicNum
+	(8-28) and for long term references it must match the derived value
+	LongTermPicNum (8-29). When decoding frames (as opposed to fields)
+	pic_num is the same as FrameNumWrap.
     * - __u16
       - ``frame_num``
-      -
+      - For short term references, this must match the frame_num value from
+	the slice header syntax (the driver will wrap the value if needed). For
+	long term references, this must be set to the value of
+	long_term_frame_idx described in the dec_ref_pic_marking() syntax.
     * - __u8
       - ``fields``
       - Specifies how the DPB entry is referenced. See :ref:`Reference Fields <h264_ref_fields>`
diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index 4cd7c541fc30..6183f43f4d73 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -1180,6 +1180,28 @@ enum v4l2_mpeg_video_h264_entropy_mode -
     is set to non zero value.
     Applicable to H264, H263 and MPEG4 encoder.
 
+``V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE (enum)``
+
+enum v4l2_mpeg_video_intra_refresh_period_type -
+    Sets the type of intra refresh. The period to refresh
+    the whole frame is specified by V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD.
+    Note that if this control is not present, then it is undefined what
+    refresh type is used and it is up to the driver to decide.
+    Applicable to H264 and HEVC encoders. Possible values are:
+
+.. tabularcolumns:: |p{9.6cm}|p{7.9cm}|
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    * - ``V4L2_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE_RANDOM``
+      - The whole frame is completely refreshed randomly
+        after the specified period.
+    * - ``V4L2_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE_CYCLIC``
+      - The whole frame MBs are completely refreshed in cyclic order
+        after the specified period.
+
 ``V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD (integer)``
     Intra macroblock refresh period. This sets the period to refresh
     the whole frame. In other words, this defines the number of frames
diff --git a/Documentation/userspace-api/media/v4l/pixfmt-reserved.rst b/Documentation/userspace-api/media/v4l/pixfmt-reserved.rst
index cabfa34b7db5..0ff68cd8cf62 100644
--- a/Documentation/userspace-api/media/v4l/pixfmt-reserved.rst
+++ b/Documentation/userspace-api/media/v4l/pixfmt-reserved.rst
@@ -239,6 +239,25 @@ please make a proposal on the linux-media mailing list.
 	It remains an opaque intermediate format and the MDP hardware must be
 	used to convert ``V4L2_PIX_FMT_MT21C`` to ``V4L2_PIX_FMT_NV12M``,
 	``V4L2_PIX_FMT_YUV420M`` or ``V4L2_PIX_FMT_YVU420``.
+    * .. _V4L2-PIX-FMT-QC08C:
+
+      - ``V4L2_PIX_FMT_QC08C``
+      - 'QC08C'
+      - Compressed Macro-tile 8-Bit YUV420 format used by Qualcomm platforms.
+        It is an opaque intermediate format. The used compression is lossless
+        and it is used by various multimedia hardware blocks like GPU, display
+        controllers, ISP and video accelerators.
+        It contains four planes for progressive video and eight planes for
+        interlaced video.
+    * .. _V4L2-PIX-FMT-QC10C:
+
+      - ``V4L2_PIX_FMT_QC10C``
+      - 'QC10C'
+      - Compressed Macro-tile 10-Bit YUV420 format used by Qualcomm platforms.
+        It is an opaque intermediate format. The used compression is lossless
+        and it is used by various multimedia hardware blocks like GPU, display
+        controllers, ISP and video accelerators.
+        It contains four planes for progressive video.
 .. raw:: latex
 
     \normalsize
diff --git a/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst b/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst
index 8ebd58c3588f..6a387f9df3ba 100644
--- a/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst
+++ b/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst
@@ -48,6 +48,17 @@ are often referred to as greyscale formats.
       - ...
       - ...
 
+    * .. _V4L2-PIX-FMT-IPU3-Y10:
+
+      - ``V4L2_PIX_FMT_IPU3_Y10``
+      - 'ip3y'
+
+      - Y'\ :sub:`0`\ [7:0]
+      - Y'\ :sub:`1`\ [5:0] Y'\ :sub:`0`\ [9:8]
+      - Y'\ :sub:`2`\ [3:0] Y'\ :sub:`1`\ [9:6]
+      - Y'\ :sub:`3`\ [1:0] Y'\ :sub:`2`\ [9:4]
+      - Y'\ :sub:`3`\ [9:2]
+
     * .. _V4L2-PIX-FMT-Y10:
 
       - ``V4L2_PIX_FMT_Y10``
@@ -133,4 +144,5 @@ are often referred to as greyscale formats.
 
     For the Y16 and Y16_BE formats, the actual sampling precision may be lower
     than 16 bits. For example, 10 bits per pixel uses values in the range 0 to
-    1023.
+    1023. For the IPU3_Y10 format 25 pixels are packed into 32 bytes, which
+    leaves the 6 most significant bits of the last byte padded with 0.
diff --git a/Documentation/userspace-api/media/v4l/vidioc-streamon.rst b/Documentation/userspace-api/media/v4l/vidioc-streamon.rst
index 0bc86f06947b..1a79313a29fa 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-streamon.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-streamon.rst
@@ -43,8 +43,7 @@ the capture or output process during streaming
 Capture hardware is disabled and no input buffers are filled (if there
 are any empty buffers in the incoming queue) until ``VIDIOC_STREAMON``
 has been called. Output hardware is disabled and no video signal is
-produced until ``VIDIOC_STREAMON`` has been called. The ioctl will
-succeed when at least one output buffer is in the incoming queue.
+produced until ``VIDIOC_STREAMON`` has been called.
 
 Memory-to-memory devices will not start until ``VIDIOC_STREAMON`` has
 been called for both the capture and output stream types.
diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index 539e9d4a4860..d1e2b9193f09 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -271,6 +271,16 @@ notifying process it will be replaced. The supervisor can also add an FD, and
 respond atomically by using the ``SECCOMP_ADDFD_FLAG_SEND`` flag and the return
 value will be the injected file descriptor number.
 
+The notifying process can be preempted, resulting in the notification being
+aborted. This can be problematic when trying to take actions on behalf of the
+notifying process that are long-running and typically retryable (mounting a
+filesytem). Alternatively, at filter installation time, the
+``SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV`` flag can be set. This flag makes it
+such that when a user notification is received by the supervisor, the notifying
+process will ignore non-fatal signals until the response is sent. Signals that
+are sent prior to the notification being received by userspace are handled
+normally.
+
 It is worth noting that ``struct seccomp_data`` contains the values of register
 arguments to the syscall, but does not contain pointers to memory. The task's
 memory is accessible to suitably privileged traces via ``ptrace()`` or
diff --git a/Documentation/virt/coco/sev-guest.rst b/Documentation/virt/coco/sev-guest.rst
new file mode 100644
index 000000000000..bf593e88cfd9
--- /dev/null
+++ b/Documentation/virt/coco/sev-guest.rst
@@ -0,0 +1,155 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================================================
+The Definitive SEV Guest API Documentation
+===================================================================
+
+1. General description
+======================
+
+The SEV API is a set of ioctls that are used by the guest or hypervisor
+to get or set a certain aspect of the SEV virtual machine. The ioctls belong
+to the following classes:
+
+ - Hypervisor ioctls: These query and set global attributes which affect the
+   whole SEV firmware.  These ioctl are used by platform provisioning tools.
+
+ - Guest ioctls: These query and set attributes of the SEV virtual machine.
+
+2. API description
+==================
+
+This section describes ioctls that is used for querying the SEV guest report
+from the SEV firmware. For each ioctl, the following information is provided
+along with a description:
+
+  Technology:
+      which SEV technology provides this ioctl. SEV, SEV-ES, SEV-SNP or all.
+
+  Type:
+      hypervisor or guest. The ioctl can be used inside the guest or the
+      hypervisor.
+
+  Parameters:
+      what parameters are accepted by the ioctl.
+
+  Returns:
+      the return value.  General error numbers (-ENOMEM, -EINVAL)
+      are not detailed, but errors with specific meanings are.
+
+The guest ioctl should be issued on a file descriptor of the /dev/sev-guest device.
+The ioctl accepts struct snp_user_guest_request. The input and output structure is
+specified through the req_data and resp_data field respectively. If the ioctl fails
+to execute due to a firmware error, then fw_err code will be set otherwise the
+fw_err will be set to 0x00000000000000ff.
+
+The firmware checks that the message sequence counter is one greater than
+the guests message sequence counter. If guest driver fails to increment message
+counter (e.g. counter overflow), then -EIO will be returned.
+
+::
+
+        struct snp_guest_request_ioctl {
+                /* Message version number */
+                __u32 msg_version;
+
+                /* Request and response structure address */
+                __u64 req_data;
+                __u64 resp_data;
+
+                /* firmware error code on failure (see psp-sev.h) */
+                __u64 fw_err;
+        };
+
+2.1 SNP_GET_REPORT
+------------------
+
+:Technology: sev-snp
+:Type: guest ioctl
+:Parameters (in): struct snp_report_req
+:Returns (out): struct snp_report_resp on success, -negative on error
+
+The SNP_GET_REPORT ioctl can be used to query the attestation report from the
+SEV-SNP firmware. The ioctl uses the SNP_GUEST_REQUEST (MSG_REPORT_REQ) command
+provided by the SEV-SNP firmware to query the attestation report.
+
+On success, the snp_report_resp.data will contains the report. The report
+contain the format described in the SEV-SNP specification. See the SEV-SNP
+specification for further details.
+
+2.2 SNP_GET_DERIVED_KEY
+-----------------------
+:Technology: sev-snp
+:Type: guest ioctl
+:Parameters (in): struct snp_derived_key_req
+:Returns (out): struct snp_derived_key_resp on success, -negative on error
+
+The SNP_GET_DERIVED_KEY ioctl can be used to get a key derive from a root key.
+The derived key can be used by the guest for any purpose, such as sealing keys
+or communicating with external entities.
+
+The ioctl uses the SNP_GUEST_REQUEST (MSG_KEY_REQ) command provided by the
+SEV-SNP firmware to derive the key. See SEV-SNP specification for further details
+on the various fields passed in the key derivation request.
+
+On success, the snp_derived_key_resp.data contains the derived key value. See
+the SEV-SNP specification for further details.
+
+
+2.3 SNP_GET_EXT_REPORT
+----------------------
+:Technology: sev-snp
+:Type: guest ioctl
+:Parameters (in/out): struct snp_ext_report_req
+:Returns (out): struct snp_report_resp on success, -negative on error
+
+The SNP_GET_EXT_REPORT ioctl is similar to the SNP_GET_REPORT. The difference is
+related to the additional certificate data that is returned with the report.
+The certificate data returned is being provided by the hypervisor through the
+SNP_SET_EXT_CONFIG.
+
+The ioctl uses the SNP_GUEST_REQUEST (MSG_REPORT_REQ) command provided by the SEV-SNP
+firmware to get the attestation report.
+
+On success, the snp_ext_report_resp.data will contain the attestation report
+and snp_ext_report_req.certs_address will contain the certificate blob. If the
+length of the blob is smaller than expected then snp_ext_report_req.certs_len will
+be updated with the expected value.
+
+See GHCB specification for further detail on how to parse the certificate blob.
+
+3. SEV-SNP CPUID Enforcement
+============================
+
+SEV-SNP guests can access a special page that contains a table of CPUID values
+that have been validated by the PSP as part of the SNP_LAUNCH_UPDATE firmware
+command. It provides the following assurances regarding the validity of CPUID
+values:
+
+ - Its address is obtained via bootloader/firmware (via CC blob), and those
+   binaries will be measured as part of the SEV-SNP attestation report.
+ - Its initial state will be encrypted/pvalidated, so attempts to modify
+   it during run-time will result in garbage being written, or #VC exceptions
+   being generated due to changes in validation state if the hypervisor tries
+   to swap the backing page.
+ - Attempts to bypass PSP checks by the hypervisor by using a normal page, or
+   a non-CPUID encrypted page will change the measurement provided by the
+   SEV-SNP attestation report.
+ - The CPUID page contents are *not* measured, but attempts to modify the
+   expected contents of a CPUID page as part of guest initialization will be
+   gated by the PSP CPUID enforcement policy checks performed on the page
+   during SNP_LAUNCH_UPDATE, and noticeable later if the guest owner
+   implements their own checks of the CPUID values.
+
+It is important to note that this last assurance is only useful if the kernel
+has taken care to make use of the SEV-SNP CPUID throughout all stages of boot.
+Otherwise, guest owner attestation provides no assurance that the kernel wasn't
+fed incorrect values at some point during boot.
+
+
+Reference
+---------
+
+SEV-SNP and GHCB specification: developer.amd.com/sev
+
+The driver is based on SEV-SNP firmware spec 0.9 and GHCB spec version 2.0.
diff --git a/Documentation/virt/index.rst b/Documentation/virt/index.rst
index edea7fea95a8..492f0920b988 100644
--- a/Documentation/virt/index.rst
+++ b/Documentation/virt/index.rst
@@ -13,6 +13,7 @@ Linux Virtualization Support
    guest-halt-polling
    ne_overview
    acrn/index
+   coco/sev-guest
 
 .. only:: html and subproject
 
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 4a900cdbc62e..c8e2e9cd84dc 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5713,6 +5713,8 @@ affect the device's behavior. Current defined flags::
   #define KVM_RUN_X86_SMM     (1 << 0)
   /* x86, set if bus lock detected in VM */
   #define KVM_RUN_BUS_LOCK    (1 << 1)
+  /* arm64, set for KVM_EXIT_DEBUG */
+  #define KVM_DEBUG_ARCH_HSR_HIGH_VALID  (1 << 0)
 
 ::
 
diff --git a/Documentation/vm/arch_pgtable_helpers.rst b/Documentation/vm/arch_pgtable_helpers.rst
index f8b225fc9190..cbaee9e59241 100644
--- a/Documentation/vm/arch_pgtable_helpers.rst
+++ b/Documentation/vm/arch_pgtable_helpers.rst
@@ -13,7 +13,7 @@ Following tables describe the expected semantics which can also be tested during
 boot via CONFIG_DEBUG_VM_PGTABLE option. All future changes in here or the debug
 test need to be in sync.
 
-======================
+
 PTE Page Table Helpers
 ======================
 
@@ -79,7 +79,7 @@ PTE Page Table Helpers
 | ptep_set_access_flags     | Converts into a more permissive PTE              |
 +---------------------------+--------------------------------------------------+
 
-======================
+
 PMD Page Table Helpers
 ======================
 
@@ -153,7 +153,7 @@ PMD Page Table Helpers
 | pmdp_set_access_flags     | Converts into a more permissive PMD              |
 +---------------------------+--------------------------------------------------+
 
-======================
+
 PUD Page Table Helpers
 ======================
 
@@ -209,7 +209,7 @@ PUD Page Table Helpers
 | pudp_set_access_flags     | Converts into a more permissive PUD              |
 +---------------------------+--------------------------------------------------+
 
-==========================
+
 HugeTLB Page Table Helpers
 ==========================
 
@@ -235,7 +235,7 @@ HugeTLB Page Table Helpers
 | huge_ptep_set_access_flags  | Converts into a more permissive HugeTLB        |
 +---------------------------+--------------------------------------------------+
 
-========================
+
 SWAP Page Table Helpers
 ========================
 
diff --git a/Documentation/vm/bootmem.rst b/Documentation/vm/bootmem.rst
new file mode 100644
index 000000000000..eb2b31eedfa1
--- /dev/null
+++ b/Documentation/vm/bootmem.rst
@@ -0,0 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========
+Boot Memory
+===========
diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst
index 44365c4574a3..e72736d53604 100644
--- a/Documentation/vm/index.rst
+++ b/Documentation/vm/index.rst
@@ -2,12 +2,39 @@
 Linux Memory Management Documentation
 =====================================
 
-This is a collection of documents about the Linux memory management (mm)
-subsystem internals with different level of details ranging from notes and
-mailing list responses for elaborating descriptions of data structures and
-algorithms.  If you are looking for advice on simply allocating memory, see the
-:ref:`memory_allocation`.  For controlling and tuning guides, see the
-:doc:`admin guide <../admin-guide/mm/index>`.
+Memory Management Guide
+=======================
+
+This is a guide to understanding the memory management subsystem
+of Linux.  If you are looking for advice on simply allocating memory,
+see the :ref:`memory_allocation`.  For controlling and tuning guides,
+see the :doc:`admin guide <../admin-guide/mm/index>`.
+
+.. toctree::
+   :maxdepth: 1
+
+   physical_memory
+   page_tables
+   process_addrs
+   bootmem
+   page_allocation
+   vmalloc
+   slab
+   highmem
+   page_reclaim
+   swap
+   page_cache
+   shmfs
+   oom
+
+Legacy Documentation
+====================
+
+This is a collection of older documents about the Linux memory management
+(MM) subsystem internals with different level of details ranging from
+notes and mailing list responses for elaborating descriptions of data
+structures and algorithms.  It should all be integrated nicely into the
+above structured documentation, or deleted if it has served its purpose.
 
 .. toctree::
    :maxdepth: 1
@@ -18,7 +45,6 @@ algorithms.  If you are looking for advice on simply allocating memory, see the
    damon/index
    free_page_reporting
    frontswap
-   highmem
    hmm
    hwpoison
    hugetlbfs_reserv
diff --git a/Documentation/vm/oom.rst b/Documentation/vm/oom.rst
new file mode 100644
index 000000000000..18e9e40c1ec1
--- /dev/null
+++ b/Documentation/vm/oom.rst
@@ -0,0 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+Out Of Memory Handling
+======================
diff --git a/Documentation/vm/page_allocation.rst b/Documentation/vm/page_allocation.rst
new file mode 100644
index 000000000000..d9b4495561f1
--- /dev/null
+++ b/Documentation/vm/page_allocation.rst
@@ -0,0 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Page Allocation
+===============
diff --git a/Documentation/vm/page_cache.rst b/Documentation/vm/page_cache.rst
new file mode 100644
index 000000000000..75eba7c431b2
--- /dev/null
+++ b/Documentation/vm/page_cache.rst
@@ -0,0 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========
+Page Cache
+==========
diff --git a/Documentation/vm/page_reclaim.rst b/Documentation/vm/page_reclaim.rst
new file mode 100644
index 000000000000..50a30b7f8ac3
--- /dev/null
+++ b/Documentation/vm/page_reclaim.rst
@@ -0,0 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Page Reclaim
+============
diff --git a/Documentation/vm/page_tables.rst b/Documentation/vm/page_tables.rst
new file mode 100644
index 000000000000..96939571d7bc
--- /dev/null
+++ b/Documentation/vm/page_tables.rst
@@ -0,0 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========
+Page Tables
+===========
diff --git a/Documentation/vm/physical_memory.rst b/Documentation/vm/physical_memory.rst
new file mode 100644
index 000000000000..2ab7b8c1c863
--- /dev/null
+++ b/Documentation/vm/physical_memory.rst
@@ -0,0 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Physical Memory
+===============
diff --git a/Documentation/vm/process_addrs.rst b/Documentation/vm/process_addrs.rst
new file mode 100644
index 000000000000..e8618fbc62c9
--- /dev/null
+++ b/Documentation/vm/process_addrs.rst
@@ -0,0 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+Process Addresses
+=================
diff --git a/Documentation/vm/shmfs.rst b/Documentation/vm/shmfs.rst
new file mode 100644
index 000000000000..8b01ebb4c30e
--- /dev/null
+++ b/Documentation/vm/shmfs.rst
@@ -0,0 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+Shared Memory Filesystem
+========================
diff --git a/Documentation/vm/slab.rst b/Documentation/vm/slab.rst
new file mode 100644
index 000000000000..87d5a5bb172f
--- /dev/null
+++ b/Documentation/vm/slab.rst
@@ -0,0 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Slab Allocation
+===============
diff --git a/Documentation/vm/slub.rst b/Documentation/vm/slub.rst
index d3028554b1e9..43063ade737a 100644
--- a/Documentation/vm/slub.rst
+++ b/Documentation/vm/slub.rst
@@ -384,5 +384,69 @@ c) Execute ``slabinfo-gnuplot.sh`` in '-t' mode, passing all of the
       40,60`` range will plot only samples collected between 40th and
       60th seconds).
 
+
+DebugFS files for SLUB
+======================
+
+For more information about current state of SLUB caches with the user tracking
+debug option enabled, debugfs files are available, typically under
+/sys/kernel/debug/slab/<cache>/ (created only for caches with enabled user
+tracking). There are 2 types of these files with the following debug
+information:
+
+1. alloc_traces::
+
+    Prints information about unique allocation traces of the currently
+    allocated objects. The output is sorted by frequency of each trace.
+
+    Information in the output:
+    Number of objects, allocating function, minimal/average/maximal jiffies since alloc,
+    pid range of the allocating processes, cpu mask of allocating cpus, and stack trace.
+
+    Example:::
+
+    1085 populate_error_injection_list+0x97/0x110 age=166678/166680/166682 pid=1 cpus=1::
+	__slab_alloc+0x6d/0x90
+	kmem_cache_alloc_trace+0x2eb/0x300
+	populate_error_injection_list+0x97/0x110
+	init_error_injection+0x1b/0x71
+	do_one_initcall+0x5f/0x2d0
+	kernel_init_freeable+0x26f/0x2d7
+	kernel_init+0xe/0x118
+	ret_from_fork+0x22/0x30
+
+
+2. free_traces::
+
+    Prints information about unique freeing traces of the currently allocated
+    objects. The freeing traces thus come from the previous life-cycle of the
+    objects and are reported as not available for objects allocated for the first
+    time. The output is sorted by frequency of each trace.
+
+    Information in the output:
+    Number of objects, freeing function, minimal/average/maximal jiffies since free,
+    pid range of the freeing processes, cpu mask of freeing cpus, and stack trace.
+
+    Example:::
+
+    1980 <not-available> age=4294912290 pid=0 cpus=0
+    51 acpi_ut_update_ref_count+0x6a6/0x782 age=236886/237027/237772 pid=1 cpus=1
+	kfree+0x2db/0x420
+	acpi_ut_update_ref_count+0x6a6/0x782
+	acpi_ut_update_object_reference+0x1ad/0x234
+	acpi_ut_remove_reference+0x7d/0x84
+	acpi_rs_get_prt_method_data+0x97/0xd6
+	acpi_get_irq_routing_table+0x82/0xc4
+	acpi_pci_irq_find_prt_entry+0x8e/0x2e0
+	acpi_pci_irq_lookup+0x3a/0x1e0
+	acpi_pci_irq_enable+0x77/0x240
+	pcibios_enable_device+0x39/0x40
+	do_pci_enable_device.part.0+0x5d/0xe0
+	pci_enable_device_flags+0xfc/0x120
+	pci_enable_device+0x13/0x20
+	virtio_pci_probe+0x9e/0x170
+	local_pci_probe+0x48/0x80
+	pci_device_probe+0x105/0x1c0
+
 Christoph Lameter, May 30, 2007
 Sergey Senozhatsky, October 23, 2015
diff --git a/Documentation/vm/swap.rst b/Documentation/vm/swap.rst
new file mode 100644
index 000000000000..78819bd4d745
--- /dev/null
+++ b/Documentation/vm/swap.rst
@@ -0,0 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====
+Swap
+====
diff --git a/Documentation/vm/vmalloc.rst b/Documentation/vm/vmalloc.rst
new file mode 100644
index 000000000000..363fe20d6b9f
--- /dev/null
+++ b/Documentation/vm/vmalloc.rst
@@ -0,0 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================
+Virtually Contiguous Memory Allocation
+======================================
diff --git a/Documentation/w1/slaves/w1_therm.rst b/Documentation/w1/slaves/w1_therm.rst
index c3c9ed7a356c..758dadba84f6 100644
--- a/Documentation/w1/slaves/w1_therm.rst
+++ b/Documentation/w1/slaves/w1_therm.rst
@@ -6,7 +6,8 @@ Supported chips:
 
   * Maxim ds18*20 based temperature sensors.
   * Maxim ds1825 based temperature sensors.
-  * GXCAS GC20MH01 temperature sensor.
+  * GXCAS GX20MH01 temperature sensor.
+  * Maxim MAX31850 thermoelement interface.
 
 Author: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
 
@@ -15,7 +16,7 @@ Description
 -----------
 
 w1_therm provides basic temperature conversion for ds18*20, ds28ea00, GX20MH01
-devices.
+and MAX31850 devices.
 
 Supported family codes:
 
@@ -137,3 +138,7 @@ bits in Config register; R2 bit in Config register enabling 13 and 14 bit
 resolutions. The device is powered up in 14-bit resolution mode. The conversion
 times specified in the datasheet are too low and have to be increased. The
 device supports driver features ``1`` and ``2``.
+
+MAX31850 device shares family number 0x3B with DS1825. The device is generally
+compatible with DS1825. The higher 4 bits of Config register read all 1,
+indicating 15, but the device is always operating in 14-bit resolution mode.
diff --git a/Documentation/x86/cpuinfo.rst b/Documentation/x86/cpuinfo.rst
index 5d54c39a063f..08246e8ac835 100644
--- a/Documentation/x86/cpuinfo.rst
+++ b/Documentation/x86/cpuinfo.rst
@@ -140,9 +140,8 @@ from #define X86_FEATURE_UMIP (16*32 + 2).
 
 In addition, there exists a variety of custom command-line parameters that
 disable specific features. The list of parameters includes, but is not limited
-to, nofsgsbase, nosmap, and nosmep. 5-level paging can also be disabled using
-"no5lvl". SMAP and SMEP are disabled with the aforementioned parameters,
-respectively.
+to, nofsgsbase, nosgx, noxsave, etc. 5-level paging can also be disabled using
+"no5lvl".
 
 e: The feature was known to be non-functional.
 ----------------------------------------------
diff --git a/Documentation/x86/exception-tables.rst b/Documentation/x86/exception-tables.rst
index de58110c5ffd..efde1fef4fbd 100644
--- a/Documentation/x86/exception-tables.rst
+++ b/Documentation/x86/exception-tables.rst
@@ -32,14 +32,14 @@ Whenever the kernel tries to access an address that is currently not
 accessible, the CPU generates a page fault exception and calls the
 page fault handler::
 
-  void do_page_fault(struct pt_regs *regs, unsigned long error_code)
+  void exc_page_fault(struct pt_regs *regs, unsigned long error_code)
 
 in arch/x86/mm/fault.c. The parameters on the stack are set up by
 the low level assembly glue in arch/x86/entry/entry_32.S. The parameter
 regs is a pointer to the saved registers on the stack, error_code
 contains a reason code for the exception.
 
-do_page_fault first obtains the unaccessible address from the CPU
+exc_page_fault() first obtains the inaccessible address from the CPU
 control register CR2. If the address is within the virtual address
 space of the process, the fault probably occurred, because the page
 was not swapped in, write protected or something similar. However,
@@ -57,10 +57,10 @@ Where does fixup point to?
 
 Since we jump to the contents of fixup, fixup obviously points
 to executable code. This code is hidden inside the user access macros.
-I have picked the get_user macro defined in arch/x86/include/asm/uaccess.h
+I have picked the get_user() macro defined in arch/x86/include/asm/uaccess.h
 as an example. The definition is somewhat hard to follow, so let's peek at
 the code generated by the preprocessor and the compiler. I selected
-the get_user call in drivers/char/sysrq.c for a detailed examination.
+the get_user() call in drivers/char/sysrq.c for a detailed examination.
 
 The original code in sysrq.c line 587::
 
@@ -281,12 +281,15 @@ vma occurs?
 
     > c017e7a5 <do_con_write+e1> movb   (%ebx),%dl
 #. MMU generates exception
-#. CPU calls do_page_fault
-#. do page fault calls search_exception_table (regs->eip == c017e7a5);
-#. search_exception_table looks up the address c017e7a5 in the
+#. CPU calls exc_page_fault()
+#. exc_page_fault() calls do_user_addr_fault()
+#. do_user_addr_fault() calls kernelmode_fixup_or_oops()
+#. kernelmode_fixup_or_oops() calls fixup_exception() (regs->eip == c017e7a5);
+#. fixup_exception() calls search_exception_tables()
+#. search_exception_tables() looks up the address c017e7a5 in the
    exception table (i.e. the contents of the ELF section __ex_table)
    and returns the address of the associated fault handle code c0199ff5.
-#. do_page_fault modifies its own return address to point to the fault
+#. fixup_exception() modifies its own return address to point to the fault
    handle code and returns.
 #. execution continues in the fault handling code.
 #. a) EAX becomes -EFAULT (== -14)
@@ -298,9 +301,9 @@ The steps 8a to 8c in a certain way emulate the faulting instruction.
 
 That's it, mostly. If you look at our example, you might ask why
 we set EAX to -EFAULT in the exception handler code. Well, the
-get_user macro actually returns a value: 0, if the user access was
+get_user() macro actually returns a value: 0, if the user access was
 successful, -EFAULT on failure. Our original code did not test this
-return value, however the inline assembly code in get_user tries to
+return value, however the inline assembly code in get_user() tries to
 return -EFAULT. GCC selected EAX to return this value.
 
 NOTE:
diff --git a/Documentation/x86/ifs.rst b/Documentation/x86/ifs.rst
new file mode 100644
index 000000000000..97abb696a680
--- /dev/null
+++ b/Documentation/x86/ifs.rst
@@ -0,0 +1,2 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. kernel-doc:: drivers/platform/x86/intel/ifs/ifs.h
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index 91b2fa456618..c73d133fd37c 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -22,10 +22,11 @@ x86-specific Documentation
    mtrr
    pat
    intel-hfi
-   intel-iommu
+   iommu
    intel_txt
    amd-memory-encryption
    amd_hsmp
+   tdx
    pti
    mds
    microcode
@@ -35,6 +36,7 @@ x86-specific Documentation
    usb-legacy-support
    i386/index
    x86_64/index
+   ifs
    sva
    sgx
    features
diff --git a/Documentation/x86/intel-iommu.rst b/Documentation/x86/intel-iommu.rst
deleted file mode 100644
index 099f13d51d5f..000000000000
--- a/Documentation/x86/intel-iommu.rst
+++ /dev/null
@@ -1,115 +0,0 @@
-===================
-Linux IOMMU Support
-===================
-
-The architecture spec can be obtained from the below location.
-
-http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf
-
-This guide gives a quick cheat sheet for some basic understanding.
-
-Some Keywords
-
-- DMAR - DMA remapping
-- DRHD - DMA Remapping Hardware Unit Definition
-- RMRR - Reserved memory Region Reporting Structure
-- ZLR  - Zero length reads from PCI devices
-- IOVA - IO Virtual address.
-
-Basic stuff
------------
-
-ACPI enumerates and lists the different DMA engines in the platform, and
-device scope relationships between PCI devices and which DMA engine  controls
-them.
-
-What is RMRR?
--------------
-
-There are some devices the BIOS controls, for e.g USB devices to perform
-PS2 emulation. The regions of memory used for these devices are marked
-reserved in the e820 map. When we turn on DMA translation, DMA to those
-regions will fail. Hence BIOS uses RMRR to specify these regions along with
-devices that need to access these regions. OS is expected to setup
-unity mappings for these regions for these devices to access these regions.
-
-How is IOVA generated?
-----------------------
-
-Well behaved drivers call pci_map_*() calls before sending command to device
-that needs to perform DMA. Once DMA is completed and mapping is no longer
-required, device performs a pci_unmap_*() calls to unmap the region.
-
-The Intel IOMMU driver allocates a virtual address per domain. Each PCIE
-device has its own domain (hence protection). Devices under p2p bridges
-share the virtual address with all devices under the p2p bridge due to
-transaction id aliasing for p2p bridges.
-
-IOVA generation is pretty generic. We used the same technique as vmalloc()
-but these are not global address spaces, but separate for each domain.
-Different DMA engines may support different number of domains.
-
-We also allocate guard pages with each mapping, so we can attempt to catch
-any overflow that might happen.
-
-
-Graphics Problems?
-------------------
-If you encounter issues with graphics devices, you can try adding
-option intel_iommu=igfx_off to turn off the integrated graphics engine.
-If this fixes anything, please ensure you file a bug reporting the problem.
-
-Some exceptions to IOVA
------------------------
-Interrupt ranges are not address translated, (0xfee00000 - 0xfeefffff).
-The same is true for peer to peer transactions. Hence we reserve the
-address from PCI MMIO ranges so they are not allocated for IOVA addresses.
-
-
-Fault reporting
----------------
-When errors are reported, the DMA engine signals via an interrupt. The fault
-reason and device that caused it with fault reason is printed on console.
-
-See below for sample.
-
-
-Boot Message Sample
--------------------
-
-Something like this gets printed indicating presence of DMAR tables
-in ACPI.
-
-ACPI: DMAR (v001 A M I  OEMDMAR  0x00000001 MSFT 0x00000097) @ 0x000000007f5b5ef0
-
-When DMAR is being processed and initialized by ACPI, prints DMAR locations
-and any RMRR's processed::
-
-	ACPI DMAR:Host address width 36
-	ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed90000
-	ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed91000
-	ACPI DMAR:DRHD (flags: 0x00000001)base: 0x00000000fed93000
-	ACPI DMAR:RMRR base: 0x00000000000ed000 end: 0x00000000000effff
-	ACPI DMAR:RMRR base: 0x000000007f600000 end: 0x000000007fffffff
-
-When DMAR is enabled for use, you will notice..
-
-PCI-DMA: Using DMAR IOMMU
--------------------------
-
-Fault reporting
-^^^^^^^^^^^^^^^
-
-::
-
-	DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
-	DMAR:[fault reason 05] PTE Write access is not set
-	DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
-	DMAR:[fault reason 05] PTE Write access is not set
-
-TBD
-----
-
-- For compatibility testing, could use unity map domain for all devices, just
-  provide a 1-1 for all useful memory under a single domain for all devices.
-- API for paravirt ops for abstracting functionality for VMM folks.
diff --git a/Documentation/x86/iommu.rst b/Documentation/x86/iommu.rst
new file mode 100644
index 000000000000..42c7a6faa39a
--- /dev/null
+++ b/Documentation/x86/iommu.rst
@@ -0,0 +1,151 @@
+=================
+x86 IOMMU Support
+=================
+
+The architecture specs can be obtained from the below locations.
+
+- Intel: http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf
+- AMD: https://www.amd.com/system/files/TechDocs/48882_IOMMU.pdf
+
+This guide gives a quick cheat sheet for some basic understanding.
+
+Basic stuff
+-----------
+
+ACPI enumerates and lists the different IOMMUs on the platform, and
+device scope relationships between devices and which IOMMU controls
+them.
+
+Some ACPI Keywords:
+
+- DMAR - Intel DMA Remapping table
+- DRHD - Intel DMA Remapping Hardware Unit Definition
+- RMRR - Intel Reserved Memory Region Reporting Structure
+- IVRS - AMD I/O Virtualization Reporting Structure
+- IVDB - AMD I/O Virtualization Definition Block
+- IVHD - AMD I/O Virtualization Hardware Definition
+
+What is Intel RMRR?
+^^^^^^^^^^^^^^^^^^^
+
+There are some devices the BIOS controls, for e.g USB devices to perform
+PS2 emulation. The regions of memory used for these devices are marked
+reserved in the e820 map. When we turn on DMA translation, DMA to those
+regions will fail. Hence BIOS uses RMRR to specify these regions along with
+devices that need to access these regions. OS is expected to setup
+unity mappings for these regions for these devices to access these regions.
+
+What is AMD IVRS?
+^^^^^^^^^^^^^^^^^
+
+The architecture defines an ACPI-compatible data structure called an I/O
+Virtualization Reporting Structure (IVRS) that is used to convey information
+related to I/O virtualization to system software.  The IVRS describes the
+configuration and capabilities of the IOMMUs contained in the platform as
+well as information about the devices that each IOMMU virtualizes.
+
+The IVRS provides information about the following:
+
+- IOMMUs present in the platform including their capabilities and proper configuration
+- System I/O topology relevant to each IOMMU
+- Peripheral devices that cannot be otherwise enumerated
+- Memory regions used by SMI/SMM, platform firmware, and platform hardware. These are generally exclusion ranges to be configured by system software.
+
+How is an I/O Virtual Address (IOVA) generated?
+-----------------------------------------------
+
+Well behaved drivers call dma_map_*() calls before sending command to device
+that needs to perform DMA. Once DMA is completed and mapping is no longer
+required, driver performs dma_unmap_*() calls to unmap the region.
+
+Intel Specific Notes
+--------------------
+
+Graphics Problems?
+^^^^^^^^^^^^^^^^^^
+
+If you encounter issues with graphics devices, you can try adding
+option intel_iommu=igfx_off to turn off the integrated graphics engine.
+If this fixes anything, please ensure you file a bug reporting the problem.
+
+Some exceptions to IOVA
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Interrupt ranges are not address translated, (0xfee00000 - 0xfeefffff).
+The same is true for peer to peer transactions. Hence we reserve the
+address from PCI MMIO ranges so they are not allocated for IOVA addresses.
+
+AMD Specific Notes
+------------------
+
+Graphics Problems?
+^^^^^^^^^^^^^^^^^^
+
+If you encounter issues with integrated graphics devices, you can try adding
+option iommu=pt to the kernel command line use a 1:1 mapping for the IOMMU.  If
+this fixes anything, please ensure you file a bug reporting the problem.
+
+Fault reporting
+---------------
+When errors are reported, the IOMMU signals via an interrupt. The fault
+reason and device that caused it is printed on the console.
+
+
+Kernel Log Samples
+------------------
+
+Intel Boot Messages
+^^^^^^^^^^^^^^^^^^^
+
+Something like this gets printed indicating presence of DMAR tables
+in ACPI:
+
+::
+
+	ACPI: DMAR (v001 A M I  OEMDMAR  0x00000001 MSFT 0x00000097) @ 0x000000007f5b5ef0
+
+When DMAR is being processed and initialized by ACPI, prints DMAR locations
+and any RMRR's processed:
+
+::
+
+	ACPI DMAR:Host address width 36
+	ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed90000
+	ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed91000
+	ACPI DMAR:DRHD (flags: 0x00000001)base: 0x00000000fed93000
+	ACPI DMAR:RMRR base: 0x00000000000ed000 end: 0x00000000000effff
+	ACPI DMAR:RMRR base: 0x000000007f600000 end: 0x000000007fffffff
+
+When DMAR is enabled for use, you will notice:
+
+::
+
+	PCI-DMA: Using DMAR IOMMU
+
+Intel Fault reporting
+^^^^^^^^^^^^^^^^^^^^^
+
+::
+
+	DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
+	DMAR:[fault reason 05] PTE Write access is not set
+	DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
+	DMAR:[fault reason 05] PTE Write access is not set
+
+AMD Boot Messages
+^^^^^^^^^^^^^^^^^
+
+Something like this gets printed indicating presence of the IOMMU:
+
+::
+
+	iommu: Default domain type: Translated
+	iommu: DMA domain TLB invalidation policy: lazy mode
+
+AMD Fault reporting
+^^^^^^^^^^^^^^^^^^^
+
+::
+
+	AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0007 address=0xffffc02000 flags=0x0000]
+	AMD-Vi: Event logged [IO_PAGE_FAULT device=07:00.0 domain=0x0007 address=0xffffc02000 flags=0x0000]
diff --git a/Documentation/x86/tdx.rst b/Documentation/x86/tdx.rst
new file mode 100644
index 000000000000..b8fa4329e1a5
--- /dev/null
+++ b/Documentation/x86/tdx.rst
@@ -0,0 +1,218 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+Intel Trust Domain Extensions (TDX)
+=====================================
+
+Intel's Trust Domain Extensions (TDX) protect confidential guest VMs from
+the host and physical attacks by isolating the guest register state and by
+encrypting the guest memory. In TDX, a special module running in a special
+mode sits between the host and the guest and manages the guest/host
+separation.
+
+Since the host cannot directly access guest registers or memory, much
+normal functionality of a hypervisor must be moved into the guest. This is
+implemented using a Virtualization Exception (#VE) that is handled by the
+guest kernel. A #VE is handled entirely inside the guest kernel, but some
+require the hypervisor to be consulted.
+
+TDX includes new hypercall-like mechanisms for communicating from the
+guest to the hypervisor or the TDX module.
+
+New TDX Exceptions
+==================
+
+TDX guests behave differently from bare-metal and traditional VMX guests.
+In TDX guests, otherwise normal instructions or memory accesses can cause
+#VE or #GP exceptions.
+
+Instructions marked with an '*' conditionally cause exceptions.  The
+details for these instructions are discussed below.
+
+Instruction-based #VE
+---------------------
+
+- Port I/O (INS, OUTS, IN, OUT)
+- HLT
+- MONITOR, MWAIT
+- WBINVD, INVD
+- VMCALL
+- RDMSR*,WRMSR*
+- CPUID*
+
+Instruction-based #GP
+---------------------
+
+- All VMX instructions: INVEPT, INVVPID, VMCLEAR, VMFUNC, VMLAUNCH,
+  VMPTRLD, VMPTRST, VMREAD, VMRESUME, VMWRITE, VMXOFF, VMXON
+- ENCLS, ENCLU
+- GETSEC
+- RSM
+- ENQCMD
+- RDMSR*,WRMSR*
+
+RDMSR/WRMSR Behavior
+--------------------
+
+MSR access behavior falls into three categories:
+
+- #GP generated
+- #VE generated
+- "Just works"
+
+In general, the #GP MSRs should not be used in guests.  Their use likely
+indicates a bug in the guest.  The guest may try to handle the #GP with a
+hypercall but it is unlikely to succeed.
+
+The #VE MSRs are typically able to be handled by the hypervisor.  Guests
+can make a hypercall to the hypervisor to handle the #VE.
+
+The "just works" MSRs do not need any special guest handling.  They might
+be implemented by directly passing through the MSR to the hardware or by
+trapping and handling in the TDX module.  Other than possibly being slow,
+these MSRs appear to function just as they would on bare metal.
+
+CPUID Behavior
+--------------
+
+For some CPUID leaves and sub-leaves, the virtualized bit fields of CPUID
+return values (in guest EAX/EBX/ECX/EDX) are configurable by the
+hypervisor. For such cases, the Intel TDX module architecture defines two
+virtualization types:
+
+- Bit fields for which the hypervisor controls the value seen by the guest
+  TD.
+
+- Bit fields for which the hypervisor configures the value such that the
+  guest TD either sees their native value or a value of 0.  For these bit
+  fields, the hypervisor can mask off the native values, but it can not
+  turn *on* values.
+
+A #VE is generated for CPUID leaves and sub-leaves that the TDX module does
+not know how to handle. The guest kernel may ask the hypervisor for the
+value with a hypercall.
+
+#VE on Memory Accesses
+======================
+
+There are essentially two classes of TDX memory: private and shared.
+Private memory receives full TDX protections.  Its content is protected
+against access from the hypervisor.  Shared memory is expected to be
+shared between guest and hypervisor and does not receive full TDX
+protections.
+
+A TD guest is in control of whether its memory accesses are treated as
+private or shared.  It selects the behavior with a bit in its page table
+entries.  This helps ensure that a guest does not place sensitive
+information in shared memory, exposing it to the untrusted hypervisor.
+
+#VE on Shared Memory
+--------------------
+
+Access to shared mappings can cause a #VE.  The hypervisor ultimately
+controls whether a shared memory access causes a #VE, so the guest must be
+careful to only reference shared pages it can safely handle a #VE.  For
+instance, the guest should be careful not to access shared memory in the
+#VE handler before it reads the #VE info structure (TDG.VP.VEINFO.GET).
+
+Shared mapping content is entirely controlled by the hypervisor. The guest
+should only use shared mappings for communicating with the hypervisor.
+Shared mappings must never be used for sensitive memory content like kernel
+stacks.  A good rule of thumb is that hypervisor-shared memory should be
+treated the same as memory mapped to userspace.  Both the hypervisor and
+userspace are completely untrusted.
+
+MMIO for virtual devices is implemented as shared memory.  The guest must
+be careful not to access device MMIO regions unless it is also prepared to
+handle a #VE.
+
+#VE on Private Pages
+--------------------
+
+An access to private mappings can also cause a #VE.  Since all kernel
+memory is also private memory, the kernel might theoretically need to
+handle a #VE on arbitrary kernel memory accesses.  This is not feasible, so
+TDX guests ensure that all guest memory has been "accepted" before memory
+is used by the kernel.
+
+A modest amount of memory (typically 512M) is pre-accepted by the firmware
+before the kernel runs to ensure that the kernel can start up without
+being subjected to a #VE.
+
+The hypervisor is permitted to unilaterally move accepted pages to a
+"blocked" state. However, if it does this, page access will not generate a
+#VE.  It will, instead, cause a "TD Exit" where the hypervisor is required
+to handle the exception.
+
+Linux #VE handler
+=================
+
+Just like page faults or #GP's, #VE exceptions can be either handled or be
+fatal.  Typically, an unhandled userspace #VE results in a SIGSEGV.
+An unhandled kernel #VE results in an oops.
+
+Handling nested exceptions on x86 is typically nasty business.  A #VE
+could be interrupted by an NMI which triggers another #VE and hilarity
+ensues.  The TDX #VE architecture anticipated this scenario and includes a
+feature to make it slightly less nasty.
+
+During #VE handling, the TDX module ensures that all interrupts (including
+NMIs) are blocked.  The block remains in place until the guest makes a
+TDG.VP.VEINFO.GET TDCALL.  This allows the guest to control when interrupts
+or a new #VE can be delivered.
+
+However, the guest kernel must still be careful to avoid potential
+#VE-triggering actions (discussed above) while this block is in place.
+While the block is in place, any #VE is elevated to a double fault (#DF)
+which is not recoverable.
+
+MMIO handling
+=============
+
+In non-TDX VMs, MMIO is usually implemented by giving a guest access to a
+mapping which will cause a VMEXIT on access, and then the hypervisor
+emulates the access.  That is not possible in TDX guests because VMEXIT
+will expose the register state to the host. TDX guests don't trust the host
+and can't have their state exposed to the host.
+
+In TDX, MMIO regions typically trigger a #VE exception in the guest.  The
+guest #VE handler then emulates the MMIO instruction inside the guest and
+converts it into a controlled TDCALL to the host, rather than exposing
+guest state to the host.
+
+MMIO addresses on x86 are just special physical addresses. They can
+theoretically be accessed with any instruction that accesses memory.
+However, the kernel instruction decoding method is limited. It is only
+designed to decode instructions like those generated by io.h macros.
+
+MMIO access via other means (like structure overlays) may result in an
+oops.
+
+Shared Memory Conversions
+=========================
+
+All TDX guest memory starts out as private at boot.  This memory can not
+be accessed by the hypervisor.  However, some kernel users like device
+drivers might have a need to share data with the hypervisor.  To do this,
+memory must be converted between shared and private.  This can be
+accomplished using some existing memory encryption helpers:
+
+ * set_memory_decrypted() converts a range of pages to shared.
+ * set_memory_encrypted() converts memory back to private.
+
+Device drivers are the primary user of shared memory, but there's no need
+to touch every driver. DMA buffers and ioremap() do the conversions
+automatically.
+
+TDX uses SWIOTLB for most DMA allocations. The SWIOTLB buffer is
+converted to shared on boot.
+
+For coherent DMA allocation, the DMA buffer gets converted on the
+allocation. Check force_dma_unencrypted() for details.
+
+References
+==========
+
+TDX reference material is collected here:
+
+https://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html
diff --git a/Documentation/x86/x86_64/boot-options.rst b/Documentation/x86/x86_64/boot-options.rst
index 07aa0007f346..03ec9cf01181 100644
--- a/Documentation/x86/x86_64/boot-options.rst
+++ b/Documentation/x86/x86_64/boot-options.rst
@@ -157,15 +157,6 @@ Rebooting
      newer BIOS, or newer board) using this option will ignore the built-in
      quirk table, and use the generic default reboot actions.
 
-Non Executable Mappings
-=======================
-
-  noexec=on|off
-    on
-      Enable(default)
-    off
-      Disable
-
 NUMA
 ====
 
@@ -310,3 +301,17 @@ Miscellaneous
     Do not use GB pages for kernel direct mappings.
   gbpages
     Use GB pages for kernel direct mappings.
+
+
+AMD SEV (Secure Encrypted Virtualization)
+=========================================
+Options relating to AMD SEV, specified via the following format:
+
+::
+
+   sev=option1[,option2]
+
+The available options are:
+
+   debug
+     Enable debug messages.
diff --git a/Documentation/x86/zero-page.rst b/Documentation/x86/zero-page.rst
index f088f5881666..45aa9cceb4f1 100644
--- a/Documentation/x86/zero-page.rst
+++ b/Documentation/x86/zero-page.rst
@@ -19,6 +19,7 @@ Offset/Size	Proto	Name			Meaning
 058/008		ALL	tboot_addr      	Physical address of tboot shared page
 060/010		ALL	ist_info		Intel SpeedStep (IST) BIOS support information
 						(struct ist_info)
+070/008		ALL	acpi_rsdp_addr		Physical address of ACPI RSDP table
 080/010		ALL	hd0_info		hd0 disk parameter, OBSOLETE!!
 090/010		ALL	hd1_info		hd1 disk parameter, OBSOLETE!!
 0A0/010		ALL	sys_desc_table		System description table (struct sys_desc_table),
@@ -27,6 +28,7 @@ Offset/Size	Proto	Name			Meaning
 0C0/004		ALL	ext_ramdisk_image	ramdisk_image high 32bits
 0C4/004		ALL	ext_ramdisk_size	ramdisk_size high 32bits
 0C8/004		ALL	ext_cmd_line_ptr	cmd_line_ptr high 32bits
+13C/004		ALL	cc_blob_address		Physical address of Confidential Computing blob
 140/080		ALL	edid_info		Video mode setup (struct edid_info)
 1C0/020		ALL	efi_info		EFI 32 information (struct efi_info)
 1E0/004		ALL	alt_mem_k		Alternative mem check, in KB